Пример #1
0
/*!
 * \brief   pixColorSegment()
 *
 * \param[in]    pixs  32 bpp; 24-bit color
 * \param[in]    maxdist max euclidean dist to existing cluster
 * \param[in]    maxcolors max number of colors allowed in first pass
 * \param[in]    selsize linear size of sel for closing to remove noise
 * \param[in]    finalcolors max number of final colors allowed after 4th pass
 * \param[in]    debugflag  1 for debug output; 0 otherwise
 * \return  pixd 8 bit with colormap, or NULL on error
 *
 * <pre>
 *  Color segmentation proceeds in four phases:
 *
 *  Phase 1:  pixColorSegmentCluster()
 *  The image is traversed in raster order.  Each pixel either
 *  becomes the representative for a new cluster or is assigned to an
 *  existing cluster.  Assignment is greedy.  The data is stored in
 *  a colormapped image.  Three auxiliary arrays are used to hold
 *  the colors of the representative pixels, for fast lookup.
 *  The average color in each cluster is computed.
 *
 *  Phase 2.  pixAssignToNearestColor()
 *  A second non-greedy clustering pass is performed, where each pixel
 *  is assigned to the nearest cluster average.  We also keep track
 *  of how many pixels are assigned to each cluster.
 *
 *  Phase 3.  pixColorSegmentClean()
 *  For each cluster, starting with the largest, do a morphological
 *  closing to eliminate small components within larger ones.
 *
 *  Phase 4.  pixColorSegmentRemoveColors()
 *  Eliminate all colors except the most populated 'finalcolors'.
 *  Then remove unused colors from the colormap, and reassign those
 *  pixels to the nearest remaining cluster, using the original pixel values.
 *
 * Notes:
 *      (1) The goal is to generate a small number of colors.
 *          Typically this would be specified by 'finalcolors',
 *          a number that would be somewhere between 3 and 6.
 *          The parameter 'maxcolors' specifies the maximum number of
 *          colors generated in the first phase.  This should be
 *          larger than finalcolors, perhaps twice as large.
 *          If more than 'maxcolors' are generated in the first phase
 *          using the input 'maxdist', the distance is repeatedly
 *          increased by a multiplicative factor until the condition
 *          is satisfied.  The implicit relation between 'maxdist'
 *          and 'maxcolors' is thus adjusted programmatically.
 *      (2) As a very rough guideline, given a target value of 'finalcolors',
 *          here are approximate values of 'maxdist' and 'maxcolors'
 *          to start with:
 *
 *               finalcolors    maxcolors    maxdist
 *               -----------    ---------    -------
 *                   3             6          100
 *                   4             8           90
 *                   5            10           75
 *                   6            12           60
 *
 *          For a given number of finalcolors, if you use too many
 *          maxcolors, the result will be noisy.  If you use too few,
 *          the result will be a relatively poor assignment of colors.
 * </pre>
 */
PIX *
pixColorSegment(PIX     *pixs,
                l_int32  maxdist,
                l_int32  maxcolors,
                l_int32  selsize,
                l_int32  finalcolors,
                l_int32  debugflag)
{
l_int32   *countarray;
PIX       *pixd;

    PROCNAME("pixColorSegment");

    if (!pixs)
        return (PIX *)ERROR_PTR("pixs not defined", procName, NULL);
    if (pixGetDepth(pixs) != 32)
        return (PIX *)ERROR_PTR("must be rgb color", procName, NULL);

        /* Phase 1; original segmentation */
    pixd = pixColorSegmentCluster(pixs, maxdist, maxcolors, debugflag);
    if (!pixd)
        return (PIX *)ERROR_PTR("pixd not made", procName, NULL);
    if (debugflag) {
        lept_mkdir("lept/segment");
        pixWriteDebug("/tmp/lept/segment/colorseg1.png", pixd, IFF_PNG);
    }

        /* Phase 2; refinement in pixel assignment */
    if ((countarray = (l_int32 *)LEPT_CALLOC(256, sizeof(l_int32))) == NULL) {
        pixDestroy(&pixd);
        return (PIX *)ERROR_PTR("countarray not made", procName, NULL);
    }
    pixAssignToNearestColor(pixd, pixs, NULL, LEVEL_IN_OCTCUBE, countarray);
    if (debugflag)
        pixWriteDebug("/tmp/lept/segment/colorseg2.png", pixd, IFF_PNG);

        /* Phase 3: noise removal by separately closing each color */
    pixColorSegmentClean(pixd, selsize, countarray);
    LEPT_FREE(countarray);
    if (debugflag)
        pixWriteDebug("/tmp/lept/segment/colorseg3.png", pixd, IFF_PNG);

        /* Phase 4: removal of colors with small population and
         * reassignment of pixels to remaining colors */
    pixColorSegmentRemoveColors(pixd, pixs, finalcolors);
    return pixd;
}
Пример #2
0
/*
 *  pixDebugFlipDetect()
 *
 *      Input:  filename (for output debug file)
 *              pixs (input to pix*Detect)
 *              pixhm (hit-miss result from ascenders or descenders)
 *              enable (1 to enable this function; 0 to disable)
 *      Return: void
 */
static void
pixDebugFlipDetect(const char *filename,
                   PIX        *pixs,
                   PIX        *pixhm,
                   l_int32     enable)
{
PIX  *pixt, *pixthm;

   if (!enable) return;

        /* Display with red dot at counted locations */
    pixt = pixConvert1To4Cmap(pixs);
    pixthm = pixMorphSequence(pixhm, "d5.5", 0);
    pixSetMaskedCmap(pixt, pixthm, 0, 0, 255, 0, 0);

    pixWriteDebug(filename, pixt, IFF_PNG);
    pixDestroy(&pixthm);
    pixDestroy(&pixt);
    return;
}
Пример #3
0
/*!
 * \brief   pixFindBaselines()
 *
 * \param[in]    pixs     1 bpp, 300 ppi
 * \param[out]   ppta     [optional] pairs of pts corresponding to
 *                        approx. ends of each text line
 * \param[in]    pixadb   for debug output; use NULL to skip
 * \return  na of baseline y values, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) Input binary image must have text lines already aligned
 *          horizontally.  This can be done by either rotating the
 *          image with pixDeskew(), or, if a projective transform
 *          is required, by doing pixDeskewLocal() first.
 *      (2) Input null for &pta if you don't want this returned.
 *          The pta will come in pairs of points (left and right end
 *          of each baseline).
 *      (3) Caution: this will not work properly on text with multiple
 *          columns, where the lines are not aligned between columns.
 *          If there are multiple columns, they should be extracted
 *          separately before finding the baselines.
 *      (4) This function constructs different types of output
 *          for baselines; namely, a set of raster line values and
 *          a set of end points of each baseline.
 *      (5) This function was designed to handle short and long text lines
 *          without using dangerous thresholds on the peak heights.  It does
 *          this by combining the differential signal with a morphological
 *          analysis of the locations of the text lines.  One can also
 *          combine this data to normalize the peak heights, by weighting
 *          the differential signal in the region of each baseline
 *          by the inverse of the width of the text line found there.
 * </pre>
 */
NUMA *
pixFindBaselines(PIX   *pixs,
                 PTA  **ppta,
                 PIXA  *pixadb)
{
l_int32    h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh;
l_int32    imaxloc, peakthresh, zerothresh, inpeak;
l_int32    mintosearch, max, maxloc, nloc, locval;
l_int32   *array;
l_float32  maxval;
BOXA      *boxa1, *boxa2, *boxa3;
GPLOT     *gplot;
NUMA      *nasum, *nadiff, *naloc, *naval;
PIX       *pix1, *pix2;
PTA       *pta;

    PROCNAME("pixFindBaselines");

    if (ppta) *ppta = NULL;
    if (!pixs || pixGetDepth(pixs) != 1)
        return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL);

        /* Close up the text characters, removing noise */
    pix1 = pixMorphSequence(pixs, "c25.1 + e15.1", 0);

        /* Estimate the resolution */
    if (pixadb) pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT);

        /* Save the difference of adjacent row sums.
         * The high positive-going peaks are the baselines */
    if ((nasum = pixCountPixelsByRow(pix1, NULL)) == NULL) {
        pixDestroy(&pix1);
        return (NUMA *)ERROR_PTR("nasum not made", procName, NULL);
    }
    h = pixGetHeight(pixs);
    nadiff = numaCreate(h);
    numaGetIValue(nasum, 0, &val2);
    for (i = 0; i < h - 1; i++) {
        val1 = val2;
        numaGetIValue(nasum, i + 1, &val2);
        numaAddNumber(nadiff, val1 - val2);
    }
    numaDestroy(&nasum);

    if (pixadb) {  /* show the difference signal */
        lept_mkdir("lept/baseline");
        gplotSimple1(nadiff, GPLOT_PNG, "/tmp/lept/baseline/diff", "Diff Sig");
        pix2 = pixRead("/tmp/lept/baseline/diff.png");
        pixaAddPix(pixadb, pix2, L_INSERT);
    }

        /* Use the zeroes of the profile to locate each baseline. */
    array = numaGetIArray(nadiff);
    ndiff = numaGetCount(nadiff);
    numaGetMax(nadiff, &maxval, &imaxloc);
    numaDestroy(&nadiff);

        /* Use this to begin locating a new peak: */
    peakthresh = (l_int32)maxval / PEAK_THRESHOLD_RATIO;
        /* Use this to begin a region between peaks: */
    zerothresh = (l_int32)maxval / ZERO_THRESHOLD_RATIO;

    naloc = numaCreate(0);
    naval = numaCreate(0);
    inpeak = FALSE;
    for (i = 0; i < ndiff; i++) {
        if (inpeak == FALSE) {
            if (array[i] > peakthresh) {  /* transition to in-peak */
                inpeak = TRUE;
                mintosearch = i + MIN_DIST_IN_PEAK; /* accept no zeros
                                               * between i and mintosearch */
                max = array[i];
                maxloc = i;
            }
        } else {  /* inpeak == TRUE; look for max */
            if (array[i] > max) {
                max = array[i];
                maxloc = i;
                mintosearch = i + MIN_DIST_IN_PEAK;
            } else if (i > mintosearch && array[i] <= zerothresh) {  /* leave */
                inpeak = FALSE;
                numaAddNumber(naval, max);
                numaAddNumber(naloc, maxloc);
            }
        }
    }
    LEPT_FREE(array);

        /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */
    if (inpeak) {
        numaAddNumber(naval, max);
        numaAddNumber(naloc, maxloc);
    }

    if (pixadb) {  /* show the raster locations for the peaks */
        gplot = gplotCreate("/tmp/lept/baseline/loc", GPLOT_PNG, "Peak locs",
                            "rasterline", "height");
        gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs");
        gplotMakeOutput(gplot);
        gplotDestroy(&gplot);
        pix2 = pixRead("/tmp/lept/baseline/loc.png");
        pixaAddPix(pixadb, pix2, L_INSERT);
    }
    numaDestroy(&naval);

        /* Generate an approximate profile of text line width.
         * First, filter the boxes of text, where there may be
         * more than one box for a given textline. */
    pix2 = pixMorphSequence(pix1, "r11 + c20.1 + o30.1 +c1.3", 0);
    if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
    boxa1 = pixConnComp(pix2, NULL, 4);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    if (boxaGetCount(boxa1) == 0) {
        numaDestroy(&naloc);
        boxaDestroy(&boxa1);
        L_INFO("no compnents after filtering\n", procName);
        return NULL;
    }
    boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.);
    boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);

        /* Optionally, find the baseline segments */
    pta = NULL;
    if (ppta) {
        pta = ptaCreate(0);
        *ppta = pta;
    }
    if (pta) {
      nloc = numaGetCount(naloc);
      nbox = boxaGetCount(boxa3);
      for (i = 0; i < nbox; i++) {
          boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh);
          for (j = 0; j < nloc; j++) {
              numaGetIValue(naloc, j, &locval);
              if (L_ABS(locval - (by + bh)) > 25)
                  continue;
              ptaAddPt(pta, bx, locval);
              ptaAddPt(pta, bx + bw, locval);
              break;
          }
      }
    }
    boxaDestroy(&boxa3);

    if (pixadb && pta) {  /* display baselines */
        l_int32  npts, x1, y1, x2, y2;
        pix1 = pixConvertTo32(pixs);
        npts = ptaGetCount(pta);
        for (i = 0; i < npts; i += 2) {
            ptaGetIPt(pta, i, &x1, &y1);
            ptaGetIPt(pta, i + 1, &x2, &y2);
            pixRenderLineArb(pix1, x1, y1, x2, y2, 2, 255, 0, 0);
        }
        pixWriteDebug("/tmp/lept/baseline/baselines.png", pix1, IFF_PNG);
        pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT);
        pixDestroy(&pix1);
    }

    return naloc;
}
Пример #4
0
/*!
 * \brief   pixUpDownDetectGeneralDwa()
 *
 * \param[in]    pixs 1 bpp, deskewed, English text
 * \param[out]   pconf confidence that text is rightside-up
 * \param[in]    mincount min number of up + down; use 0 for default
 * \param[in]    npixels number of pixels removed from each side of word box
 * \param[in]    debug 1 for debug output; 0 otherwise
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) See the notes in pixUpDownDetectGeneral() for usage.
 * </pre>
 */
l_int32
pixUpDownDetectGeneralDwa(PIX        *pixs,
                          l_float32  *pconf,
                          l_int32     mincount,
                          l_int32     npixels,
                          l_int32     debug)
{
char       flipsel1[] = "flipsel1";
char       flipsel2[] = "flipsel2";
char       flipsel3[] = "flipsel3";
char       flipsel4[] = "flipsel4";
l_int32    countup, countdown, nmax;
l_float32  nup, ndown;
PIX       *pixt, *pix0, *pix1, *pix2, *pix3, *pixm;

    PROCNAME("pixUpDownDetectGeneralDwa");

    if (!pconf)
        return ERROR_INT("&conf not defined", procName, 1);
    *pconf = 0.0;
    if (!pixs || pixGetDepth(pixs) != 1)
        return ERROR_INT("pixs not defined or not 1 bpp", procName, 1);
    if (mincount == 0)
        mincount = DEFAULT_MIN_UP_DOWN_COUNT;
    if (npixels < 0)
        npixels = 0;

    lept_mkdir("lept/orient");

        /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1).
         * This closes holes in x-height characters and joins them at
         * the x-height.  There is more noise in the descender detection
         * from this, but it works fairly well. */
    pixt = pixMorphSequenceDwa(pixs, "c1.8 + c30.1", 0);

        /* Be sure to add the border before the flip DWA operations! */
    pix0 = pixAddBorderGeneral(pixt, ADDED_BORDER, ADDED_BORDER,
                                ADDED_BORDER, ADDED_BORDER, 0);
    pixDestroy(&pixt);

        /* Optionally, make a mask of the word bounding boxes, shortening
         * each of them by a fixed amount at each end. */
    pixm = NULL;
    if (npixels > 0) {
        l_int32  i, nbox, x, y, w, h;
        BOX   *box;
        BOXA  *boxa;
        pix1 = pixMorphSequenceDwa(pix0, "o10.1", 0);
        boxa = pixConnComp(pix1, NULL, 8);
        pixm = pixCreateTemplate(pix1);
        pixDestroy(&pix1);
        nbox = boxaGetCount(boxa);
        for (i = 0; i < nbox; i++) {
            box = boxaGetBox(boxa, i, L_CLONE);
            boxGetGeometry(box, &x, &y, &w, &h);
            if (w > 2 * npixels)
                pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13,
                            PIX_SET, NULL, 0, 0);
            boxDestroy(&box);
        }
        boxaDestroy(&boxa);
    }

        /* Find the ascenders and optionally filter with pixm.
         * For an explanation of the procedure used for counting the result
         * of the HMT, see comments in pixUpDownDetectGeneral().  */
    pix1 = pixFlipFHMTGen(NULL, pix0, flipsel1);
    pix2 = pixFlipFHMTGen(NULL, pix0, flipsel2);
    pixOr(pix1, pix1, pix2);
    if (pixm)
        pixAnd(pix1, pix1, pixm);
    pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
    pixCountPixels(pix3, &countup, NULL);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);

        /* Find the ascenders and optionally filter with pixm. */
    pix1 = pixFlipFHMTGen(NULL, pix0, flipsel3);
    pix2 = pixFlipFHMTGen(NULL, pix0, flipsel4);
    pixOr(pix1, pix1, pix2);
    if (pixm)
        pixAnd(pix1, pix1, pixm);
    pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0);
    pixCountPixels(pix3, &countdown, NULL);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);

        /* Evaluate statistically, generating a confidence that is
         * related to the probability with a gaussian distribution. */
    nup = (l_float32)(countup);
    ndown = (l_float32)(countdown);
    nmax = L_MAX(countup, countdown);
    if (nmax > mincount)
        *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown));

    if (debug) {
        if (pixm) pixWriteDebug("/tmp/lept/orient/pixm2.png", pixm, IFF_PNG);
        fprintf(stderr, "nup = %7.3f, ndown = %7.3f, conf = %7.3f\n",
                nup, ndown, *pconf);
        if (*pconf > DEFAULT_MIN_UP_DOWN_CONF)
            fprintf(stderr, "Text is rightside-up\n");
        if (*pconf < -DEFAULT_MIN_UP_DOWN_CONF)
            fprintf(stderr, "Text is upside-down\n");
    }

    pixDestroy(&pix0);
    pixDestroy(&pixm);
    return 0;
}