Пример #1
0
/*!
 * \brief   boxaMakeWHRatioIndicator()
 *
 * \param[in]    boxa
 * \param[in]    ratio     width/height threshold value
 * \param[in]    relation  L_SELECT_IF_LT, L_SELECT_IF_GT,
 *                         L_SELECT_IF_LTE, L_SELECT_IF_GTE
 * \return  na indicator array, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) To keep narrow components, use relation = L_SELECT_IF_LT or
 *          L_SELECT_IF_LTE.
 *          To keep wide components, use relation = L_SELECT_IF_GT or
 *          L_SELECT_IF_GTE.
 * </pre>
 */
NUMA *
boxaMakeWHRatioIndicator(BOXA      *boxa,
                         l_float32  ratio,
                         l_int32    relation)
{
l_int32    i, n, w, h, ival;
l_float32  whratio;
NUMA      *na;

    PROCNAME("boxaMakeWHRatioIndicator");

    if (!boxa)
        return (NUMA *)ERROR_PTR("boxa not defined", procName, NULL);
    if ((n = boxaGetCount(boxa)) == 0)
        return (NUMA *)ERROR_PTR("boxa is empty", procName, NULL);
    if (relation != L_SELECT_IF_LT && relation != L_SELECT_IF_GT &&
        relation != L_SELECT_IF_LTE && relation != L_SELECT_IF_GTE)
        return (NUMA *)ERROR_PTR("invalid relation", procName, NULL);

    na = numaCreate(n);
    for (i = 0; i < n; i++) {
        ival = 0;
        boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h);
        whratio = (l_float32)w / (l_float32)h;

        if ((relation == L_SELECT_IF_LT && whratio < ratio) ||
            (relation == L_SELECT_IF_GT && whratio > ratio) ||
            (relation == L_SELECT_IF_LTE && whratio <= ratio) ||
            (relation == L_SELECT_IF_GTE && whratio >= ratio))
            ival = 1;
        numaAddNumber(na, ival);
    }

    return na;
}
Пример #2
0
/*!
 * \brief   boxaMakeAreaIndicator()
 *
 * \param[in]    boxa
 * \param[in]    area       threshold value of width * height
 * \param[in]    relation   L_SELECT_IF_LT, L_SELECT_IF_GT,
 *                          L_SELECT_IF_LTE, L_SELECT_IF_GTE
 * \return  na indicator array, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) To keep small components, use relation = L_SELECT_IF_LT or
 *          L_SELECT_IF_LTE.
 *          To keep large components, use relation = L_SELECT_IF_GT or
 *          L_SELECT_IF_GTE.
 * </pre>
 */
NUMA *
boxaMakeAreaIndicator(BOXA     *boxa,
                      l_int32   area,
                      l_int32   relation)
{
l_int32  i, n, w, h, ival;
NUMA    *na;

    PROCNAME("boxaMakeAreaIndicator");

    if (!boxa)
        return (NUMA *)ERROR_PTR("boxa not defined", procName, NULL);
    if ((n = boxaGetCount(boxa)) == 0)
        return (NUMA *)ERROR_PTR("boxa is empty", procName, NULL);
    if (relation != L_SELECT_IF_LT && relation != L_SELECT_IF_GT &&
        relation != L_SELECT_IF_LTE && relation != L_SELECT_IF_GTE)
        return (NUMA *)ERROR_PTR("invalid relation", procName, NULL);

    na = numaCreate(n);
    for (i = 0; i < n; i++) {
        ival = 0;
        boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h);

        if ((relation == L_SELECT_IF_LT && w * h < area) ||
            (relation == L_SELECT_IF_GT && w * h > area) ||
            (relation == L_SELECT_IF_LTE && w * h <= area) ||
            (relation == L_SELECT_IF_GTE && w * h >= area))
            ival = 1;
        numaAddNumber(na, ival);
    }

    return na;
}
Пример #3
0
// Converts the Boxa array to a list of C_BLOB, getting rid of severely
// overlapping outlines and those that are children of a bigger one.
// The output is a list of C_BLOBs that are owned by the list.
// The C_OUTLINEs in the C_BLOBs contain no outline data - just empty
// bounding boxes. The Boxa is consumed and destroyed.
void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height,
                                    Boxa** boxes, C_BLOB_LIST* blobs) {
#ifdef HAVE_LIBLEPT
  C_OUTLINE_LIST outlines;
  C_OUTLINE_IT ol_it = &outlines;
  // Iterate the boxes to convert to outlines.
  int nboxes = boxaGetCount(*boxes);
  for (int i = 0; i < nboxes; ++i) {
    l_int32 x, y, width, height;
    boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height);
    // Make a C_OUTLINE from the leptonica box. This is a bit of a hack,
    // as there is no outline, just a bounding box, but with some very
    // small changes to coutln.cpp, it works nicely.
    ICOORD top_left(x, image_height - y);
    ICOORD bot_right(x + width, image_height - (y + height));
    CRACKEDGE startpt;
    startpt.pos = top_left;
    C_OUTLINE* outline = new C_OUTLINE(&startpt, top_left, bot_right, 0);
    ol_it.add_after_then_move(outline);
  }
  // Use outlines_to_blobs to convert the outlines to blobs and find
  // overlapping and contained objects. The output list of blobs in the block
  // has all the bad ones filtered out and deleted.
  BLOCK block;
  ICOORD page_tl(0, 0);
  ICOORD page_br(image_width, image_height);
  outlines_to_blobs(&block, page_tl, page_br, &outlines);
  // Transfer the created blobs to the output list.
  C_BLOB_IT blob_it(blobs);
  blob_it.add_list_after(block.blob_list());
  // The boxes aren't needed any more.
  boxaDestroy(boxes);
#endif
}
Пример #4
0
/*!
 *  boxaConvertToPta()
 *
 *      Input:  boxa 
 *              ncorners (2 or 4 for the representation of each box)
 *      Return: pta (with @ncorners points for each box in the boxa),
 *                   or null on error
 *
 *  Notes:
 *      (1) If ncorners == 2, we select the UL and LR corners.
 *          Otherwise we save all 4 corners in this order: UL, UR, LL, LR.
 */
PTA *
boxaConvertToPta(BOXA    *boxa,
                 l_int32  ncorners)
{
l_int32  i, n, x, y, w, h;
PTA     *pta;

    PROCNAME("boxaConvertToPta");

    if (!boxa)
        return (PTA *)ERROR_PTR("boxa not defined", procName, NULL);
    if (ncorners != 2 && ncorners != 4)
        return (PTA *)ERROR_PTR("ncorners not 2 or 4", procName, NULL);

    n = boxaGetCount(boxa);
    if ((pta = ptaCreate(n)) == NULL)
        return (PTA *)ERROR_PTR("pta not made", procName, NULL);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, &x, &y, &w, &h);
        ptaAddPt(pta, x, y);
        if (ncorners == 2) 
            ptaAddPt(pta, x + w - 1, y + h - 1);
        else {
            ptaAddPt(pta, x + w - 1, y);
            ptaAddPt(pta, x, y + h - 1);
            ptaAddPt(pta, x + w - 1, y + h - 1);
        }
    }

    return pta;
}
Пример #5
0
/*!
 *  boxaaGetExtent()
 *
 *      Input:  boxaa
 *              &w  (<optional return> width)
 *              &h  (<optional return> height)
 *              &box (<optional return>, minimum box containing all boxa
 *                    in boxaa)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) The returned w and h are the minimum size image
 *          that would contain all boxes untranslated.
 */
l_int32
boxaaGetExtent(BOXAA    *boxaa,
               l_int32  *pw,
               l_int32  *ph,
               BOX     **pbox)
{
l_int32  i, j, n, m, x, y, w, h, xmax, ymax, xmin, ymin, found;
BOXA    *boxa;

    PROCNAME("boxaaGetExtent");

    if (!pw && !ph && !pbox)
        return ERROR_INT("no ptrs defined", procName, 1);
    if (pbox) *pbox = NULL;
    if (pw) *pw = 0;
    if (ph) *ph = 0;
    if (!boxaa)
        return ERROR_INT("boxaa not defined", procName, 1);

    n = boxaaGetCount(boxaa);
    if (n == 0)
        return ERROR_INT("no boxa in boxaa", procName, 1);

    xmax = ymax = 0;
    xmin = ymin = 100000000;
    found = FALSE;
    for (i = 0; i < n; i++) {
        boxa = boxaaGetBoxa(boxaa, i, L_CLONE);
        m = boxaGetCount(boxa);
        for (j = 0; j < m; j++) {
            boxaGetBoxGeometry(boxa, j, &x, &y, &w, &h);
            if (w <= 0 || h <= 0)
                continue;
            found = TRUE;
            xmin = L_MIN(xmin, x);
            ymin = L_MIN(ymin, y);
            xmax = L_MAX(xmax, x + w);
            ymax = L_MAX(ymax, y + h);
        }
    }
    if (!found)
        return ERROR_INT("no valid boxes in boxaa", procName, 1);
    if (pw) *pw = xmax;
    if (ph) *ph = ymax;
    if (pbox)
      *pbox = boxCreate(xmin, ymin, xmax - xmin, ymax - ymin);

    return 0;
}
Пример #6
0
/*!
 *  boxaaAlignBox()
 *
 *      Input:  boxaa
 *              box (to be aligned with the last of one of the boxa
 *                   in boxaa, if possible)
 *              delta (amount by which consecutive components can miss
 *                     in overlap and still be included in the array)
 *              &index (of boxa with best overlap, or if none match,
 *                      this is the index of the next boxa to be generated)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) This is not greedy; it finds the boxa whose last box has
 *          the biggest overlap with the input box.
 */
l_int32
boxaaAlignBox(BOXAA    *baa,
              BOX      *box,
              l_int32   delta,
              l_int32  *pindex)
{
l_int32  i, n, m, y, yt, h, ht, ovlp, maxovlp, maxindex;
BOXA    *boxa;

    PROCNAME("boxaaAlignBox");

    if (!baa)
        return ERROR_INT("baa not defined", procName, 1);
    if (!box)
        return ERROR_INT("box not defined", procName, 1);
    if (!pindex)
        return ERROR_INT("&index not defined", procName, 1);

    n = boxaaGetCount(baa);
    boxGetGeometry(box, NULL, &y, NULL, &h);
    maxovlp = -10000000;
    for (i = 0; i < n; i++) {
        boxa = boxaaGetBoxa(baa, i, L_CLONE);
        if ((m = boxaGetCount(boxa)) == 0) {
            L_WARNING("no boxes in boxa", procName);
            continue;
        }
        boxaGetBoxGeometry(boxa, m - 1, NULL, &yt, NULL, &ht);  /* last one */
        boxaDestroy(&boxa);

            /* Overlap < 0 means the components do not overlap vertically */
        if (yt >= y)
            ovlp = y + h - 1 - yt;
        else
            ovlp = yt + ht - 1 - y;
        if (ovlp > maxovlp) {
            maxovlp = ovlp;
            maxindex = i;
        }
    }

    if (maxovlp + delta >= 0)
        *pindex = maxindex;
    else
        *pindex = n;
    return 0;
}
Пример #7
0
/*!
 *  boxaGetValidCount()
 *
 *      Input:  boxa
 *      Return: count (of valid boxes); 0 if no valid boxes or on error
 */
l_int32
boxaGetValidCount(BOXA  *boxa)
{
l_int32  n, i, w, h, count;

    PROCNAME("boxaGetValidCount");

    if (!boxa)
        return ERROR_INT("boxa not defined", procName, 0);

    n = boxaGetCount(boxa);
    for (i = 0, count = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h);
        if (w > 0 && h > 0)
            count++;
    }
    return count;
}
Пример #8
0
// Get a set of bounding boxes of possible horizontal lines in the image.
// The input resolution overrides any resolution set in src_pix.
// The output line_pix contains just all the detected lines.
// The output boxes undergo the transformation (x,y)->(height-y,x) so the
// lines can be found with a vertical line finder afterwards.
// This transformation allows a simple x/y flip to reverse it in tesseract
// coordinates and it is faster to flip the lines than rotate the image.
Boxa* LineFinder::GetHLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) {
#ifdef HAVE_LIBLEPT
  // Remove any parts of 1 inch/kThinLineFraction high or more, by opening
  // away the thin lines and subtracting what's left.
  // This is very generous and will leave in even quite wide lines.
  Pix* pixt1 = pixOpenBrick(NULL, src_pix, 1, resolution / kThinLineFraction);
  pixSubtract(pixt1, src_pix, pixt1);
  // Spread vertically to allow for some skew.
  Pix* pixt2 = pixDilateBrick(NULL, pixt1, 1, 3);
  // Now keep only wide stuff of width at least 1 inch/kMinLineLengthFraction.
  pixOpenBrick(pixt1, pixt2, resolution / kMinLineLengthFraction, 1);
  pixDestroy(&pixt2);
  // Put a single pixel crack in every line at an arbitrary spacing,
  // so they break up and the bounding boxes can be used to get the
  // direction accurately enough without needing outlines.
  int wpl = pixGetWpl(pixt1);
  int width = pixGetWidth(pixt1);
  int height = pixGetHeight(pixt1);
  l_uint32* data = pixGetData(pixt1);
  for (int y = 0; y < height; ++y, data += wpl) {
    for (int x = kCrackSpacing; x < width; x += kCrackSpacing) {
      CLEAR_DATA_BIT(data, x);
    }
  }
  if (textord_tabfind_show_vlines)
    pixWrite("hlines.png", pixt1, IFF_PNG);
  Boxa* boxa = pixConnComp(pixt1, NULL, 8);
  *line_pix = pixt1;

  // Iterate the boxes to flip x and y.
  int nboxes = boxaGetCount(boxa);
  for (int i = 0; i < nboxes; ++i) {
    l_int32 x, y, box_width, box_height;
    boxaGetBoxGeometry(boxa, i, &x, &y, &box_width, &box_height);
    Box* box = boxCreate(height - (y + box_height),
                         width - (x + box_width), box_height, box_width);
    boxaReplaceBox(boxa, i, box);
  }
  return boxa;
#else
  return NULL;
#endif
}
Пример #9
0
/*!
 * \brief   boxaLocationRange()
 *
 * \param[in]    boxa
 * \param[out]   pminx    [optional] min (UL corner) x value of all boxes
 * \param[out]   pminy    [optional] min (UL corner) y value of all boxes
 * \param[out]   pmaxx    [optional] max (UL corner) x value of all boxes
 * \param[out]   pmaxy    [optional] max (UL corner) y value of all boxes
 * \return  0 if OK, 1 on error
 */
l_ok
boxaLocationRange(BOXA     *boxa,
                  l_int32  *pminx,
                  l_int32  *pminy,
                  l_int32  *pmaxx,
                  l_int32  *pmaxy)
{
l_int32  minx, miny, maxx, maxy, i, n, x, y;

    PROCNAME("boxaLocationRange");

    if (!pminx && !pminy && !pmaxx && !pmaxy)
        return ERROR_INT("no data can be returned", procName, 1);
    if (pminx) *pminx = 0;
    if (pminy) *pminy = 0;
    if (pmaxx) *pmaxx = 0;
    if (pmaxy) *pmaxy = 0;
    if (!boxa)
        return ERROR_INT("boxa not defined", procName, 1);

    minx = miny = 100000000;
    maxx = maxy = 0;
    n = boxaGetCount(boxa);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, &x, &y, NULL, NULL);
        if (x < minx)
            minx = x;
        if (y < miny)
            miny = y;
        if (x > maxx)
            maxx = x;
        if (y > maxy)
            maxy = y;
    }

    if (pminx) *pminx = minx;
    if (pminy) *pminy = miny;
    if (pmaxx) *pmaxx = maxx;
    if (pmaxy) *pmaxy = maxy;

    return 0;
}
Пример #10
0
/*!
 *  boxaExtractAsPta()
 *
 *      Input:  boxa
 *              &ptal (<optional return> array of left locations vs. index)
 *              &ptat (<optional return> array of top locations vs. index)
 *              &ptar (<optional return> array of right locations vs. index)
 *              &ptab (<optional return> array of bottom locations vs. index)
 *              keepinvalid (1 to keep invalid boxes; 0 to remove them)
 *      Return: 0 if OK, 1 on error
 */
l_int32
boxaExtractAsPta(BOXA    *boxa,
                 PTA    **pptal,
                 PTA    **pptat,
                 PTA    **pptar,
                 PTA    **pptab,
                 l_int32  keepinvalid)
{
l_int32  i, n, left, top, right, bot, w, h;

    PROCNAME("boxaExtractAsPta");

    if (!pptal && !pptar && !pptat && !pptab)
        return ERROR_INT("no output requested", procName, 1);
    if (pptal) *pptal = NULL;
    if (pptat) *pptat = NULL;
    if (pptar) *pptar = NULL;
    if (pptab) *pptab = NULL;
    if (!boxa)
        return ERROR_INT("boxa not defined", procName, 1);
    if (!keepinvalid && boxaGetValidCount(boxa) == 0)
        return ERROR_INT("no valid boxes", procName, 1);

    n = boxaGetCount(boxa);
    if (pptal) *pptal = ptaCreate(n);
    if (pptat) *pptat = ptaCreate(n);
    if (pptar) *pptar = ptaCreate(n);
    if (pptab) *pptab = ptaCreate(n);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, &left, &top, &w, &h);
        if (!keepinvalid && (w <= 0 || h <= 0))
            continue;
        right = left + w - 1;
        bot = top + h - 1;
        if (pptal) ptaAddPt(*pptal, i, left);
        if (pptat) ptaAddPt(*pptat, i, top);
        if (pptar) ptaAddPt(*pptar, i, right);
        if (pptab) ptaAddPt(*pptab, i, bot);
    }

    return 0;
}
Пример #11
0
/*!
 * \brief   boxaGetExtent()
 *
 * \param[in]    boxa
 * \param[out]   pw      [optional] width
 * \param[out]   ph      [optional] height
 * \param[out]   pbox    [optional]  minimum box containing all boxes in boxa
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) This computes the minimum rectangular bounding region
 *          that contains all valid boxes in a boxa.
 *      (2) The returned w and h are the minimum size image
 *          that would contain all boxes untranslated.
 *      (3) If there are no valid boxes, returned w and h are 0 and
 *          all parameters in the returned box are 0.  This
 *          is not an error, because an empty boxa is valid and
 *          boxaGetExtent() is required for serialization.
 * </pre>
 */
l_ok
boxaGetExtent(BOXA     *boxa,
              l_int32  *pw,
              l_int32  *ph,
              BOX     **pbox)
{
l_int32  i, n, x, y, w, h, xmax, ymax, xmin, ymin, found;

    PROCNAME("boxaGetExtent");

    if (!pw && !ph && !pbox)
        return ERROR_INT("no ptrs defined", procName, 1);
    if (pw) *pw = 0;
    if (ph) *ph = 0;
    if (pbox) *pbox = NULL;
    if (!boxa)
        return ERROR_INT("boxa not defined", procName, 1);

    n = boxaGetCount(boxa);
    xmax = ymax = 0;
    xmin = ymin = 100000000;
    found = FALSE;
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, &x, &y, &w, &h);
        if (w <= 0 || h <= 0)
            continue;
        found = TRUE;
        xmin = L_MIN(xmin, x);
        ymin = L_MIN(ymin, y);
        xmax = L_MAX(xmax, x + w);
        ymax = L_MAX(ymax, y + h);
    }
    if (found == FALSE)  /* no valid boxes in boxa */
        xmin = ymin = 0;
    if (pw) *pw = xmax;
    if (ph) *ph = ymax;
    if (pbox)
      *pbox = boxCreate(xmin, ymin, xmax - xmin, ymax - ymin);

    return 0;
}
Пример #12
0
/*!
 * \brief   boxaSizeRange()
 *
 * \param[in]    boxa
 * \param[out]   pminw    [optional] min width of all boxes
 * \param[out]   pmaxw    [optional] max width of all boxes
 * \param[out]   pminh    [optional] min height of all boxes
 * \param[out]   pmaxh    [optional] max height of all boxes
 * \return  0 if OK, 1 on error
 */
l_ok
boxaSizeRange(BOXA     *boxa,
              l_int32  *pminw,
              l_int32  *pminh,
              l_int32  *pmaxw,
              l_int32  *pmaxh)
{
l_int32  minw, minh, maxw, maxh, i, n, w, h;

    PROCNAME("boxaSizeRange");

    if (!pminw && !pmaxw && !pminh && !pmaxh)
        return ERROR_INT("no data can be returned", procName, 1);
    if (pminw) *pminw = 0;
    if (pminh) *pminh = 0;
    if (pmaxw) *pmaxw = 0;
    if (pmaxh) *pmaxh = 0;
    if (!boxa)
        return ERROR_INT("boxa not defined", procName, 1);

    minw = minh = 100000000;
    maxw = maxh = 0;
    n = boxaGetCount(boxa);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h);
        if (w < minw)
            minw = w;
        if (h < minh)
            minh = h;
        if (w > maxw)
            maxw = w;
        if (h > maxh)
            maxh = h;
    }

    if (pminw) *pminw = minw;
    if (pminh) *pminh = minh;
    if (pmaxw) *pmaxw = maxw;
    if (pmaxh) *pmaxh = maxh;
    return 0;
}
Пример #13
0
/*!
 * \brief   boxaGetArea()
 *
 * \param[in]    boxa
 * \param[out]   parea    total area of all boxes
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) Measures the total area of the boxes, without regard to overlaps.
 * </pre>
 */
l_ok
boxaGetArea(BOXA     *boxa,
            l_int32  *parea)
{
l_int32  i, n, w, h;

    PROCNAME("boxaGetArea");

    if (!parea)
        return ERROR_INT("&area not defined", procName, 1);
    *parea = 0;
    if (!boxa)
        return ERROR_INT("boxa not defined", procName, 1);

    n = boxaGetCount(boxa);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h);
        *parea += w * h;
    }
    return 0;
}
Пример #14
0
jboolean Java_com_googlecode_leptonica_android_Boxa_nativeGetGeometry(JNIEnv *env, jclass clazz,
                                                                      jlong nativeBoxa,
                                                                      jint index,
                                                                      jintArray dimensions) {
    BOXA *boxa = (BOXA *) nativeBoxa;
    jint *dimensionArray = env->GetIntArrayElements(dimensions, NULL);
    l_int32 x, y, w, h;

    if (boxaGetBoxGeometry(boxa, index, &x, &y, &w, &h)) {
        return JNI_FALSE;
    }

    dimensionArray[0] = x;
    dimensionArray[1] = y;
    dimensionArray[2] = w;
    dimensionArray[3] = h;

    env->ReleaseIntArrayElements(dimensions, dimensionArray, 0);

    return JNI_TRUE;
}
Пример #15
0
/*!
 *  boxaExtractAsNuma()
 *
 *      Input:  boxa
 *              &nax (<optional return> array of x locations)
 *              &nay (<optional return> array of y locations)
 *              &naw (<optional return> array of w locations)
 *              &nah (<optional return> array of h locations)
 *              keepinvalid (1 to keep invalid boxes; 0 to remove them)
 *      Return: 0 if OK, 1 on error
 */
l_int32
boxaExtractAsNuma(BOXA    *boxa,
                  NUMA   **pnax,
                  NUMA   **pnay,
                  NUMA   **pnaw,
                  NUMA   **pnah,
                  l_int32  keepinvalid)
{
l_int32  i, n, x, y, w, h;

    PROCNAME("boxaExtractAsNuma");

    if (!pnax && !pnay && !pnaw && !pnah)
        return ERROR_INT("no output requested", procName, 1);
    if (pnax) *pnax = NULL;
    if (pnay) *pnay = NULL;
    if (pnaw) *pnaw = NULL;
    if (pnah) *pnah = NULL;
    if (!boxa)
        return ERROR_INT("boxa not defined", procName, 1);
    if (!keepinvalid && boxaGetValidCount(boxa) == 0)
        return ERROR_INT("no valid boxes", procName, 1);

    n = boxaGetCount(boxa);
    if (pnax) *pnax = numaCreate(n);
    if (pnay) *pnay = numaCreate(n);
    if (pnaw) *pnaw = numaCreate(n);
    if (pnah) *pnah = numaCreate(n);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, &x, &y, &w, &h);
        if (!keepinvalid && (w <= 0 || h <= 0))
            continue;
        if (pnax) numaAddNumber(*pnax, x);
        if (pnay) numaAddNumber(*pnay, y);
        if (pnaw) numaAddNumber(*pnaw, w);
        if (pnah) numaAddNumber(*pnah, h);
    }

    return 0;
}
Пример #16
0
/*!
 * \brief   pixFindBaselines()
 *
 * \param[in]    pixs     1 bpp, 300 ppi
 * \param[out]   ppta     [optional] pairs of pts corresponding to
 *                        approx. ends of each text line
 * \param[in]    pixadb   for debug output; use NULL to skip
 * \return  na of baseline y values, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) Input binary image must have text lines already aligned
 *          horizontally.  This can be done by either rotating the
 *          image with pixDeskew(), or, if a projective transform
 *          is required, by doing pixDeskewLocal() first.
 *      (2) Input null for &pta if you don't want this returned.
 *          The pta will come in pairs of points (left and right end
 *          of each baseline).
 *      (3) Caution: this will not work properly on text with multiple
 *          columns, where the lines are not aligned between columns.
 *          If there are multiple columns, they should be extracted
 *          separately before finding the baselines.
 *      (4) This function constructs different types of output
 *          for baselines; namely, a set of raster line values and
 *          a set of end points of each baseline.
 *      (5) This function was designed to handle short and long text lines
 *          without using dangerous thresholds on the peak heights.  It does
 *          this by combining the differential signal with a morphological
 *          analysis of the locations of the text lines.  One can also
 *          combine this data to normalize the peak heights, by weighting
 *          the differential signal in the region of each baseline
 *          by the inverse of the width of the text line found there.
 * </pre>
 */
NUMA *
pixFindBaselines(PIX   *pixs,
                 PTA  **ppta,
                 PIXA  *pixadb)
{
l_int32    h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh;
l_int32    imaxloc, peakthresh, zerothresh, inpeak;
l_int32    mintosearch, max, maxloc, nloc, locval;
l_int32   *array;
l_float32  maxval;
BOXA      *boxa1, *boxa2, *boxa3;
GPLOT     *gplot;
NUMA      *nasum, *nadiff, *naloc, *naval;
PIX       *pix1, *pix2;
PTA       *pta;

    PROCNAME("pixFindBaselines");

    if (ppta) *ppta = NULL;
    if (!pixs || pixGetDepth(pixs) != 1)
        return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL);

        /* Close up the text characters, removing noise */
    pix1 = pixMorphSequence(pixs, "c25.1 + e15.1", 0);

        /* Estimate the resolution */
    if (pixadb) pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT);

        /* Save the difference of adjacent row sums.
         * The high positive-going peaks are the baselines */
    if ((nasum = pixCountPixelsByRow(pix1, NULL)) == NULL) {
        pixDestroy(&pix1);
        return (NUMA *)ERROR_PTR("nasum not made", procName, NULL);
    }
    h = pixGetHeight(pixs);
    nadiff = numaCreate(h);
    numaGetIValue(nasum, 0, &val2);
    for (i = 0; i < h - 1; i++) {
        val1 = val2;
        numaGetIValue(nasum, i + 1, &val2);
        numaAddNumber(nadiff, val1 - val2);
    }
    numaDestroy(&nasum);

    if (pixadb) {  /* show the difference signal */
        lept_mkdir("lept/baseline");
        gplotSimple1(nadiff, GPLOT_PNG, "/tmp/lept/baseline/diff", "Diff Sig");
        pix2 = pixRead("/tmp/lept/baseline/diff.png");
        pixaAddPix(pixadb, pix2, L_INSERT);
    }

        /* Use the zeroes of the profile to locate each baseline. */
    array = numaGetIArray(nadiff);
    ndiff = numaGetCount(nadiff);
    numaGetMax(nadiff, &maxval, &imaxloc);
    numaDestroy(&nadiff);

        /* Use this to begin locating a new peak: */
    peakthresh = (l_int32)maxval / PEAK_THRESHOLD_RATIO;
        /* Use this to begin a region between peaks: */
    zerothresh = (l_int32)maxval / ZERO_THRESHOLD_RATIO;

    naloc = numaCreate(0);
    naval = numaCreate(0);
    inpeak = FALSE;
    for (i = 0; i < ndiff; i++) {
        if (inpeak == FALSE) {
            if (array[i] > peakthresh) {  /* transition to in-peak */
                inpeak = TRUE;
                mintosearch = i + MIN_DIST_IN_PEAK; /* accept no zeros
                                               * between i and mintosearch */
                max = array[i];
                maxloc = i;
            }
        } else {  /* inpeak == TRUE; look for max */
            if (array[i] > max) {
                max = array[i];
                maxloc = i;
                mintosearch = i + MIN_DIST_IN_PEAK;
            } else if (i > mintosearch && array[i] <= zerothresh) {  /* leave */
                inpeak = FALSE;
                numaAddNumber(naval, max);
                numaAddNumber(naloc, maxloc);
            }
        }
    }
    LEPT_FREE(array);

        /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */
    if (inpeak) {
        numaAddNumber(naval, max);
        numaAddNumber(naloc, maxloc);
    }

    if (pixadb) {  /* show the raster locations for the peaks */
        gplot = gplotCreate("/tmp/lept/baseline/loc", GPLOT_PNG, "Peak locs",
                            "rasterline", "height");
        gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs");
        gplotMakeOutput(gplot);
        gplotDestroy(&gplot);
        pix2 = pixRead("/tmp/lept/baseline/loc.png");
        pixaAddPix(pixadb, pix2, L_INSERT);
    }
    numaDestroy(&naval);

        /* Generate an approximate profile of text line width.
         * First, filter the boxes of text, where there may be
         * more than one box for a given textline. */
    pix2 = pixMorphSequence(pix1, "r11 + c20.1 + o30.1 +c1.3", 0);
    if (pixadb) pixaAddPix(pixadb, pix2, L_COPY);
    boxa1 = pixConnComp(pix2, NULL, 4);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    if (boxaGetCount(boxa1) == 0) {
        numaDestroy(&naloc);
        boxaDestroy(&boxa1);
        L_INFO("no compnents after filtering\n", procName);
        return NULL;
    }
    boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.);
    boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL);
    boxaDestroy(&boxa1);
    boxaDestroy(&boxa2);

        /* Optionally, find the baseline segments */
    pta = NULL;
    if (ppta) {
        pta = ptaCreate(0);
        *ppta = pta;
    }
    if (pta) {
      nloc = numaGetCount(naloc);
      nbox = boxaGetCount(boxa3);
      for (i = 0; i < nbox; i++) {
          boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh);
          for (j = 0; j < nloc; j++) {
              numaGetIValue(naloc, j, &locval);
              if (L_ABS(locval - (by + bh)) > 25)
                  continue;
              ptaAddPt(pta, bx, locval);
              ptaAddPt(pta, bx + bw, locval);
              break;
          }
      }
    }
    boxaDestroy(&boxa3);

    if (pixadb && pta) {  /* display baselines */
        l_int32  npts, x1, y1, x2, y2;
        pix1 = pixConvertTo32(pixs);
        npts = ptaGetCount(pta);
        for (i = 0; i < npts; i += 2) {
            ptaGetIPt(pta, i, &x1, &y1);
            ptaGetIPt(pta, i + 1, &x2, &y2);
            pixRenderLineArb(pix1, x1, y1, x2, y2, 2, 255, 0, 0);
        }
        pixWrite("/tmp/lept/baseline/baselines.png", pix1, IFF_PNG);
        pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT);
        pixDestroy(&pix1);
    }

    return naloc;
}
Пример #17
0
/*!
 *  boxaSort()
 *
 *      Input:  boxa
 *              sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH,
 *                        L_SORT_BY_HEIGHT, L_SORT_BY_MIN_DIMENSION,
 *                        L_SORT_BY_MAX_DIMENSION, L_SORT_BY_PERIMETER,
 *                        L_SORT_BY_AREA, L_SORT_BY_ASPECT_RATIO)
 *              sortorder  (L_SORT_INCREASING, L_SORT_DECREASING)
 *              &naindex (<optional return> index of sorted order into
 *                        original array)
 *      Return: boxad (sorted version of boxas), or null on error
 */
BOXA *
boxaSort(BOXA    *boxas,
         l_int32  sorttype,
         l_int32  sortorder,
         NUMA   **pnaindex)
{
l_int32    i, n, x, y, w, h, size;
BOXA      *boxad;
NUMA      *na, *naindex;

    PROCNAME("boxaSort");

    if (pnaindex) *pnaindex = NULL;
    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
    if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y &&
        sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
        sorttype != L_SORT_BY_MIN_DIMENSION &&
        sorttype != L_SORT_BY_MAX_DIMENSION &&
        sorttype != L_SORT_BY_PERIMETER &&
        sorttype != L_SORT_BY_AREA &&
        sorttype != L_SORT_BY_ASPECT_RATIO)
        return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL);
    if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
        return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL);

        /* Use O(n) binsort if possible */
    n = boxaGetCount(boxas);
    if (n > MIN_COMPS_FOR_BIN_SORT &&
        ((sorttype == L_SORT_BY_X) || (sorttype == L_SORT_BY_Y) ||
         (sorttype == L_SORT_BY_WIDTH) || (sorttype == L_SORT_BY_HEIGHT) ||
         (sorttype == L_SORT_BY_PERIMETER)))
        return boxaBinSort(boxas, sorttype, sortorder, pnaindex);

        /* Build up numa of specific data */
    if ((na = numaCreate(n)) == NULL)
        return (BOXA *)ERROR_PTR("na not made", procName, NULL);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h);
        switch (sorttype)
        {
        case L_SORT_BY_X:
            numaAddNumber(na, x);
            break;
        case L_SORT_BY_Y:
            numaAddNumber(na, y);
            break;
        case L_SORT_BY_WIDTH:
            numaAddNumber(na, w);
            break;
        case L_SORT_BY_HEIGHT:
            numaAddNumber(na, h);
            break;
        case L_SORT_BY_MIN_DIMENSION:
            size = L_MIN(w, h);
            numaAddNumber(na, size);
            break;
        case L_SORT_BY_MAX_DIMENSION:
            size = L_MAX(w, h);
            numaAddNumber(na, size);
            break;
        case L_SORT_BY_PERIMETER:
            size = w + h;
            numaAddNumber(na, size);
            break;
        case L_SORT_BY_AREA:
            size = w * h;
            numaAddNumber(na, size);
            break;
        case L_SORT_BY_ASPECT_RATIO:
            numaAddNumber(na, (l_float32)w / (l_float32)h);
            break;
        default:
            L_WARNING("invalid sort type", procName);
        }
    }

        /* Get the sort index for data array */
    if ((naindex = numaGetSortIndex(na, sortorder)) == NULL)
        return (BOXA *)ERROR_PTR("naindex not made", procName, NULL);

        /* Build up sorted boxa using sort index */
    boxad = boxaSortByIndex(boxas, naindex);

    if (pnaindex)
        *pnaindex = naindex;
    else
        numaDestroy(&naindex);
    numaDestroy(&na);
    return boxad;
}
Пример #18
0
/*!
 *  boxaBinSort()
 *
 *      Input:  boxa
 *              sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH,
 *                        L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER)
 *              sortorder  (L_SORT_INCREASING, L_SORT_DECREASING)
 *              &naindex (<optional return> index of sorted order into
 *                        original array)
 *      Return: boxad (sorted version of boxas), or null on error
 *
 *  Notes:
 *      (1) For a large number of boxes (say, greater than 1000), this
 *          O(n) binsort is much faster than the O(nlogn) shellsort.
 *          For 5000 components, this is over 20x faster than boxaSort().
 *      (2) Consequently, boxaSort() calls this function if it will
 *          likely go much faster.
 */
BOXA *
boxaBinSort(BOXA    *boxas,
            l_int32  sorttype,
            l_int32  sortorder,
            NUMA   **pnaindex)
{
l_int32  i, n, x, y, w, h;
BOXA    *boxad;
NUMA    *na, *naindex;

    PROCNAME("boxaBinSort");

    if (pnaindex) *pnaindex = NULL;
    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
    if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y &&
        sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
        sorttype != L_SORT_BY_PERIMETER)
        return (BOXA *)ERROR_PTR("invalid sort type", procName, NULL);
    if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
        return (BOXA *)ERROR_PTR("invalid sort order", procName, NULL);

        /* Generate Numa of appropriate box dimensions */
    n = boxaGetCount(boxas);
    if ((na = numaCreate(n)) == NULL)
        return (BOXA *)ERROR_PTR("na not made", procName, NULL);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxas, i, &x, &y, &w, &h);
        switch (sorttype)
        {
        case L_SORT_BY_X:
            numaAddNumber(na, x);
            break;
        case L_SORT_BY_Y:
            numaAddNumber(na, y);
            break;
        case L_SORT_BY_WIDTH:
            numaAddNumber(na, w);
            break;
        case L_SORT_BY_HEIGHT:
            numaAddNumber(na, h);
            break;
        case L_SORT_BY_PERIMETER:
            numaAddNumber(na, w + h);
            break;
        default:
            L_WARNING("invalid sort type", procName);
        }
    }

        /* Get the sort index for data array */
    if ((naindex = numaGetBinSortIndex(na, sortorder)) == NULL)
        return (BOXA *)ERROR_PTR("naindex not made", procName, NULL);

        /* Build up sorted boxa using the sort index */
    boxad = boxaSortByIndex(boxas, naindex);

    if (pnaindex)
        *pnaindex = naindex;
    else
        numaDestroy(&naindex);
    numaDestroy(&na);
    return boxad;
}
Пример #19
0
/*!
 *  pixaBinSort()
 *
 *      Input:  pixas
 *              sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH,
 *                        L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER)
 *              sortorder  (L_SORT_INCREASING, L_SORT_DECREASING)
 *              &naindex (<optional return> index of sorted order into
 *                        original array)
 *              copyflag (L_COPY, L_CLONE)
 *      Return: pixad (sorted version of pixas), or null on error
 *
 *  Notes:
 *      (1) This sorts based on the data in the boxa.  If the boxa
 *          count is not the same as the pixa count, this returns an error.
 *      (2) The copyflag refers to the pix and box copies that are
 *          inserted into the sorted pixa.  These are either L_COPY
 *          or L_CLONE.
 *      (3) For a large number of boxes (say, greater than 1000), this
 *          O(n) binsort is much faster than the O(nlogn) shellsort.
 *          For 5000 components, this is over 20x faster than boxaSort().
 *      (4) Consequently, pixaSort() calls this function if it will
 *          likely go much faster.
 */
PIXA *
pixaBinSort(PIXA    *pixas,
            l_int32  sorttype,
            l_int32  sortorder,
            NUMA   **pnaindex,
            l_int32  copyflag)
{
l_int32  i, n, x, y, w, h;
BOXA    *boxa;
NUMA    *na, *naindex;
PIXA    *pixad;

    PROCNAME("pixaBinSort");

    if (pnaindex) *pnaindex = NULL;
    if (!pixas)
        return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL);
    if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y && 
        sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
        sorttype != L_SORT_BY_PERIMETER)
        return (PIXA *)ERROR_PTR("invalid sort type", procName, NULL);
    if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
        return (PIXA *)ERROR_PTR("invalid sort order", procName, NULL);
    if (copyflag != L_COPY && copyflag != L_CLONE)
        return (PIXA *)ERROR_PTR("invalid copy flag", procName, NULL);

        /* Verify that the pixa and its boxa have the same count */
    if ((boxa = pixas->boxa) == NULL)   /* not owned; do not destroy */
        return (PIXA *)ERROR_PTR("boxa not found", procName, NULL);
    n = pixaGetCount(pixas);
    if (boxaGetCount(boxa) != n)
        return (PIXA *)ERROR_PTR("boxa and pixa counts differ", procName, NULL);

        /* Generate Numa of appropriate box dimensions */
    if ((na = numaCreate(n)) == NULL)
        return (PIXA *)ERROR_PTR("na not made", procName, NULL);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, &x, &y, &w, &h);
        switch (sorttype)
        {
        case L_SORT_BY_X:
            numaAddNumber(na, x);
            break;
        case L_SORT_BY_Y:
            numaAddNumber(na, y);
            break;
        case L_SORT_BY_WIDTH:
            numaAddNumber(na, w);
            break;
        case L_SORT_BY_HEIGHT:
            numaAddNumber(na, h);
            break;
        case L_SORT_BY_PERIMETER:
            numaAddNumber(na, w + h);
            break;
        default:
            L_WARNING("invalid sort type", procName);
        }
    }

        /* Get the sort index for data array */
    if ((naindex = numaGetBinSortIndex(na, sortorder)) == NULL)
        return (PIXA *)ERROR_PTR("naindex not made", procName, NULL);

        /* Build up sorted pixa using sort index */
    if ((pixad = pixaSortByIndex(pixas, naindex, copyflag)) == NULL)
        return (PIXA *)ERROR_PTR("pixad not made", procName, NULL);

    if (pnaindex)
        *pnaindex = naindex;
    else
        numaDestroy(&naindex);
    numaDestroy(&na);
    return pixad;
}
Пример #20
0
/*!
 *  pixaSort()
 *
 *      Input:  pixas
 *              sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH,
 *                        L_SORT_BY_HEIGHT, L_SORT_BY_MIN_DIMENSION,
 *                        L_SORT_BY_MAX_DIMENSION, L_SORT_BY_PERIMETER,
 *                        L_SORT_BY_AREA, L_SORT_BY_ASPECT_RATIO)
 *              sortorder  (L_SORT_INCREASING, L_SORT_DECREASING)
 *              &naindex (<optional return> index of sorted order into
 *                        original array)
 *              copyflag (L_COPY, L_CLONE)
 *      Return: pixad (sorted version of pixas), or null on error
 *
 *  Notes:
 *      (1) This sorts based on the data in the boxa.  If the boxa
 *          count is not the same as the pixa count, this returns an error.
 *      (2) The copyflag refers to the pix and box copies that are
 *          inserted into the sorted pixa.  These are either L_COPY
 *          or L_CLONE.
 */
PIXA *
pixaSort(PIXA    *pixas,
         l_int32  sorttype,
         l_int32  sortorder,
         NUMA   **pnaindex,
         l_int32  copyflag)
{
l_int32  i, n, x, y, w, h;
BOXA    *boxa;
NUMA    *na, *naindex;
PIXA    *pixad;

    PROCNAME("pixaSort");

    if (pnaindex) *pnaindex = NULL;
    if (!pixas)
        return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL);
    if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y && 
        sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT &&
        sorttype != L_SORT_BY_MIN_DIMENSION &&
        sorttype != L_SORT_BY_MAX_DIMENSION &&
        sorttype != L_SORT_BY_PERIMETER &&
        sorttype != L_SORT_BY_AREA &&
        sorttype != L_SORT_BY_ASPECT_RATIO)
        return (PIXA *)ERROR_PTR("invalid sort type", procName, NULL);
    if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING)
        return (PIXA *)ERROR_PTR("invalid sort order", procName, NULL);
    if (copyflag != L_COPY && copyflag != L_CLONE)
        return (PIXA *)ERROR_PTR("invalid copy flag", procName, NULL);

    if ((boxa = pixas->boxa) == NULL)   /* not owned; do not destroy */
        return (PIXA *)ERROR_PTR("boxa not found", procName, NULL);
    n = pixaGetCount(pixas);
    if (boxaGetCount(boxa) != n)
        return (PIXA *)ERROR_PTR("boxa and pixa counts differ", procName, NULL);

        /* Use O(n) binsort if possible */
    if (n > MIN_COMPS_FOR_BIN_SORT &&
        ((sorttype == L_SORT_BY_X) || (sorttype == L_SORT_BY_Y) ||
         (sorttype == L_SORT_BY_WIDTH) || (sorttype == L_SORT_BY_HEIGHT) ||
         (sorttype == L_SORT_BY_PERIMETER)))
        return pixaBinSort(pixas, sorttype, sortorder, pnaindex, copyflag);

        /* Build up numa of specific data */
    if ((na = numaCreate(n)) == NULL)
        return (PIXA *)ERROR_PTR("na not made", procName, NULL);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, &x, &y, &w, &h);
        switch (sorttype)
        {
        case L_SORT_BY_X:
            numaAddNumber(na, x);
            break;
        case L_SORT_BY_Y:
            numaAddNumber(na, y);
            break;
        case L_SORT_BY_WIDTH:
            numaAddNumber(na, w);
            break;
        case L_SORT_BY_HEIGHT:
            numaAddNumber(na, h);
            break;
        case L_SORT_BY_MIN_DIMENSION:
            numaAddNumber(na, L_MIN(w, h));
            break;
        case L_SORT_BY_MAX_DIMENSION:
            numaAddNumber(na, L_MAX(w, h));
            break;
        case L_SORT_BY_PERIMETER:
            numaAddNumber(na, w + h);
            break;
        case L_SORT_BY_AREA:
            numaAddNumber(na, w * h);
            break;
        case L_SORT_BY_ASPECT_RATIO:
            numaAddNumber(na, (l_float32)w / (l_float32)h);
            break;
        default:
            L_WARNING("invalid sort type", procName);
        }
    }

        /* Get the sort index for data array */
    if ((naindex = numaGetSortIndex(na, sortorder)) == NULL)
        return (PIXA *)ERROR_PTR("naindex not made", procName, NULL);

        /* Build up sorted pixa using sort index */
    if ((pixad = pixaSortByIndex(pixas, naindex, copyflag)) == NULL)
        return (PIXA *)ERROR_PTR("pixad not made", procName, NULL);

    if (pnaindex)
        *pnaindex = naindex;
    else
        numaDestroy(&naindex);
    numaDestroy(&na);
    return pixad;
}
Пример #21
0
/*!
 *  pixaaDisplay()
 *
 *      Input:  pixaa
 *              w, h (if set to 0, determines the size from the
 *                    b.b. of the components in pixaa)
 *      Return: pix, or null on error
 *
 *  Notes:
 *      (1) Each pix of the pixaa is displayed at the location given by
 *          its box, translated by the box of the containing pixa
 *          if it exists.
 */
PIX *
pixaaDisplay(PIXAA   *pixaa,
             l_int32  w,
             l_int32  h)
{
l_int32  i, j, n, nbox, na, d, wmax, hmax, x, y, xb, yb, wb, hb;
BOXA    *boxa1;  /* top-level boxa */
BOXA    *boxa;
PIX     *pixt, *pixd;
PIXA    *pixa;

    PROCNAME("pixaaDisplay");

    if (!pixaa)
        return (PIX *)ERROR_PTR("pixaa not defined", procName, NULL);
    
    n = pixaaGetCount(pixaa);
    if (n == 0)
        return (PIX *)ERROR_PTR("no components", procName, NULL);

        /* If w and h not input, determine the minimum size required
         * to contain the origin and all c.c. */
    boxa1 = pixaaGetBoxa(pixaa, L_CLONE);
    nbox = boxaGetCount(boxa1);
    if (w == 0 || h == 0) {
        if (nbox == n)
            boxaGetExtent(boxa1, &w, &h, NULL);
        else {  /* have to use the lower-level boxa for each pixa */
            wmax = hmax = 0;
            for (i = 0; i < n; i++) {
                pixa = pixaaGetPixa(pixaa, i, L_CLONE);
                boxa = pixaGetBoxa(pixa, L_CLONE);
                boxaGetExtent(boxa, &w, &h, NULL);
                wmax = L_MAX(wmax, w);
                hmax = L_MAX(hmax, h);
                pixaDestroy(&pixa);
                boxaDestroy(&boxa);
            }
            w = wmax;
            h = hmax;
        }
    }

        /* Get depth from first pix */
    pixa = pixaaGetPixa(pixaa, 0, L_CLONE);
    pixt = pixaGetPix(pixa, 0, L_CLONE);
    d = pixGetDepth(pixt);
    pixaDestroy(&pixa);
    pixDestroy(&pixt);

    if ((pixd = pixCreate(w, h, d)) == NULL)
        return (PIX *)ERROR_PTR("pixd not made", procName, NULL);
    
    x = y = 0;
    for (i = 0; i < n; i++) {
        pixa = pixaaGetPixa(pixaa, i, L_CLONE);
        if (nbox == n)
            boxaGetBoxGeometry(boxa1, i, &x, &y, NULL, NULL);
        na = pixaGetCount(pixa);
        for (j = 0; j < na; j++) {
            pixaGetBoxGeometry(pixa, j, &xb, &yb, &wb, &hb);
            pixt = pixaGetPix(pixa, j, L_CLONE);
            pixRasterop(pixd, x + xb, y + yb, wb, hb, PIX_PAINT, pixt, 0, 0);
            pixDestroy(&pixt);
        }
        pixaDestroy(&pixa);
    }
    boxaDestroy(&boxa1);

    return pixd;
}
Пример #22
0
main(int    argc,
     char **argv)
{
l_int32  i, n, w, h, success, display;
FILE    *fp;
BOXA    *boxa;
NUMA    *naindex, *naw, *nah, *naw_med, *nah_med;
PIX     *pixs, *pixt, *pixd;

    if (regTestSetup(argc, argv, &fp, &display, &success, NULL))
        return 1;

        /* Generate arrays of word widths and heights */
    pixs = pixRead("feyn.tif");
    pixGetWordBoxesInTextlines(pixs, 1, 6, 6, 500, 50, &boxa, &naindex);
    n = boxaGetCount(boxa);
    naw = numaCreate(0);
    nah = numaCreate(0);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h);
        numaAddNumber(naw, w);
        numaAddNumber(nah, h);
    }
    boxaDestroy(&boxa);
    numaDestroy(&naindex);

        /* Make the rank bin arrays of median values, with 10 bins */
    numaGetRankBinValues(naw, 10, NULL, &naw_med);
    numaGetRankBinValues(nah, 10, NULL, &nah_med);
    gplotSimple1(naw_med, GPLOT_PNG, "/tmp/w_10bin", 
                 "width vs rank bins (10)");
    gplotSimple1(nah_med, GPLOT_PNG, "/tmp/h_10bin", 
                 "height vs rank bins (10)");
    numaDestroy(&naw_med);
    numaDestroy(&nah_med);

        /* Make the rank bin arrays of median values, with 30 bins */
    numaGetRankBinValues(naw, 30, NULL, &naw_med);
    numaGetRankBinValues(nah, 30, NULL, &nah_med);
    gplotSimple1(naw_med, GPLOT_PNG, "/tmp/w_30bin", 
                 "width vs rank bins (30)");
    gplotSimple1(nah_med, GPLOT_PNG, "/tmp/h_30bin", 
                 "height vs rank bins (30)");
    numaDestroy(&naw_med);
    numaDestroy(&nah_med);

        /* Give gnuplot time to write out the files */
#ifndef  _WIN32
    sleep(2);
#else
    Sleep(2000);
#endif  /* _WIN32 */

        /* Save as golden files, or check against them */
    regTestCheckFile(fp, argv, "/tmp/w_10bin.png", 0, &success);
    regTestCheckFile(fp, argv, "/tmp/h_10bin.png", 1, &success);
    regTestCheckFile(fp, argv, "/tmp/w_30bin.png", 2, &success);
    regTestCheckFile(fp, argv, "/tmp/h_30bin.png", 3, &success);

        /* Display results for debugging */
    pixt = pixRead("/tmp/w_10bin.png");
    pixDisplayWithTitle(pixt, 0, 0, NULL, display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/h_10bin.png");
    pixDisplayWithTitle(pixt, 650, 0, NULL, display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/w_30bin.png");
    pixDisplayWithTitle(pixt, 0, 550, NULL, display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/h_30bin.png");
    pixDisplayWithTitle(pixt, 650, 550, NULL, display);
    pixDestroy(&pixt);

    pixDestroy(&pixs);
    numaDestroy(&naw);
    numaDestroy(&nah);
    regTestCleanup(argc, argv, fp, success, NULL);
    return 0;
}
Пример #23
0
// Finds image regions within the source pix (page image) and returns
// the image regions as a Boxa, Pixa pair, analgous to pixConnComp.
// The returned boxa, pixa may be NULL, meaning no images found.
// If not NULL, they must be destroyed by the caller.
void ImageFinder::FindImages(Pix* pix, Boxa** boxa, Pixa** pixa) {
  *boxa = NULL;
  *pixa = NULL;

#ifdef HAVE_LIBLEPT
  if (pixGetWidth(pix) < kMinImageFindSize ||
      pixGetHeight(pix) < kMinImageFindSize)
    return;  // Not worth looking at small images.
  // Reduce by factor 2.
  Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
  pixDisplayWrite(pixr, textord_tabfind_show_images);

  // Get the halftone mask directly from Leptonica.
  Pix *pixht2 = pixGenHalftoneMask(pixr, NULL, NULL,
                                   textord_tabfind_show_images);
  pixDestroy(&pixr);
  if (pixht2 == NULL)
    return;

  // Expand back up again.
  Pix *pixht = pixExpandReplicate(pixht2, 2);
  pixDisplayWrite(pixht, textord_tabfind_show_images);
  pixDestroy(&pixht2);

  // Fill to capture pixels near the mask edges that were missed
  Pix *pixt = pixSeedfillBinary(NULL, pixht, pix, 8);
  pixOr(pixht, pixht, pixt);
  pixDestroy(&pixt);

  // Eliminate lines and bars that may be joined to images.
  Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
  pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
  pixDisplayWrite(pixfinemask, textord_tabfind_show_images);
  Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
  Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
  pixDestroy(&pixreduced);
  pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
  Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
  pixDestroy(&pixreduced2);
  pixDisplayWrite(pixcoarsemask, textord_tabfind_show_images);
  // Combine the coarse and fine image masks.
  pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
  pixDestroy(&pixfinemask);
  // Dilate a bit to make sure we get everything.
  pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
  Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16);
  pixDestroy(&pixcoarsemask);
  pixDisplayWrite(pixmask, textord_tabfind_show_images);
  // And the image mask with the line and bar remover.
  pixAnd(pixht, pixht, pixmask);
  pixDestroy(&pixmask);
  pixDisplayWrite(pixht, textord_tabfind_show_images);
  // Find the individual image regions in the mask image.
  *boxa = pixConnComp(pixht, pixa, 8);
  pixDestroy(&pixht);
  // Rectangularize the individual images. If a sharp edge in vertical and/or
  // horizontal occupancy can be found, it indicates a probably rectangular
  // image with unwanted bits merged on, so clip to the approximate rectangle.
  int npixes = pixaGetCount(*pixa);
  for (int i = 0; i < npixes; ++i) {
    int x_start, x_end, y_start, y_end;
    Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE);
    pixDisplayWrite(img_pix, textord_tabfind_show_images);
    if (pixNearlyRectangular(img_pix, kMinRectangularFraction,
                             kMaxRectangularFraction,
                             kMaxRectangularGradient,
                             &x_start, &y_start, &x_end, &y_end)) {
      // Add 1 to the size as a kludgy flag to indicate to the later stages
      // of processing that it is a clipped rectangular image .
      Pix* simple_pix = pixCreate(pixGetWidth(img_pix) + 1,
                                  pixGetHeight(img_pix), 1);
      pixDestroy(&img_pix);
      pixRasterop(simple_pix, x_start, y_start, x_end - x_start,
                  y_end - y_start, PIX_SET, NULL, 0, 0);
      // pixaReplacePix takes ownership of the simple_pix.
      pixaReplacePix(*pixa, i, simple_pix, NULL);
      img_pix = pixaGetPix(*pixa, i, L_CLONE);
    }
    // Subtract the pix from the correct location in the master image.
    l_int32 x, y, width, height;
    pixDisplayWrite(img_pix, textord_tabfind_show_images);
    boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height);
    pixRasterop(pix, x, y, width, height, PIX_NOT(PIX_SRC) & PIX_DST,
                img_pix, 0, 0);
    pixDestroy(&img_pix);
  }
#endif
}
Пример #24
0
/*!
 * \brief   boxaMakeSizeIndicator()
 *
 * \param[in]    boxa
 * \param[in]    width, height   threshold dimensions
 * \param[in]    type            L_SELECT_WIDTH, L_SELECT_HEIGHT,
 *                               L_SELECT_IF_EITHER, L_SELECT_IF_BOTH
 * \param[in]    relation        L_SELECT_IF_LT, L_SELECT_IF_GT,
 *                               L_SELECT_IF_LTE, L_SELECT_IF_GTE
 * \return  na indicator array, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) The args specify constraints on the size of the
 *          components that are kept.
 *      (2) If the selection type is L_SELECT_WIDTH, the input
 *          height is ignored, and v.v.
 *      (3) To keep small components, use relation = L_SELECT_IF_LT or
 *          L_SELECT_IF_LTE.
 *          To keep large components, use relation = L_SELECT_IF_GT or
 *          L_SELECT_IF_GTE.
 * </pre>
 */
NUMA *
boxaMakeSizeIndicator(BOXA     *boxa,
                      l_int32   width,
                      l_int32   height,
                      l_int32   type,
                      l_int32   relation)
{
l_int32  i, n, w, h, ival;
NUMA    *na;

    PROCNAME("boxaMakeSizeIndicator");

    if (!boxa)
        return (NUMA *)ERROR_PTR("boxa not defined", procName, NULL);
    if ((n = boxaGetCount(boxa)) == 0)
        return (NUMA *)ERROR_PTR("boxa is empty", procName, NULL);
    if (type != L_SELECT_WIDTH && type != L_SELECT_HEIGHT &&
        type != L_SELECT_IF_EITHER && type != L_SELECT_IF_BOTH)
        return (NUMA *)ERROR_PTR("invalid type", procName, NULL);
    if (relation != L_SELECT_IF_LT && relation != L_SELECT_IF_GT &&
        relation != L_SELECT_IF_LTE && relation != L_SELECT_IF_GTE)
        return (NUMA *)ERROR_PTR("invalid relation", procName, NULL);

    na = numaCreate(n);
    for (i = 0; i < n; i++) {
        ival = 0;
        boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h);
        switch (type)
        {
        case L_SELECT_WIDTH:
            if ((relation == L_SELECT_IF_LT && w < width) ||
                (relation == L_SELECT_IF_GT && w > width) ||
                (relation == L_SELECT_IF_LTE && w <= width) ||
                (relation == L_SELECT_IF_GTE && w >= width))
                ival = 1;
            break;
        case L_SELECT_HEIGHT:
            if ((relation == L_SELECT_IF_LT && h < height) ||
                (relation == L_SELECT_IF_GT && h > height) ||
                (relation == L_SELECT_IF_LTE && h <= height) ||
                (relation == L_SELECT_IF_GTE && h >= height))
                ival = 1;
            break;
        case L_SELECT_IF_EITHER:
            if (((relation == L_SELECT_IF_LT) && (w < width || h < height)) ||
                ((relation == L_SELECT_IF_GT) && (w > width || h > height)) ||
               ((relation == L_SELECT_IF_LTE) && (w <= width || h <= height)) ||
                ((relation == L_SELECT_IF_GTE) && (w >= width || h >= height)))
                    ival = 1;
            break;
        case L_SELECT_IF_BOTH:
            if (((relation == L_SELECT_IF_LT) && (w < width && h < height)) ||
                ((relation == L_SELECT_IF_GT) && (w > width && h > height)) ||
               ((relation == L_SELECT_IF_LTE) && (w <= width && h <= height)) ||
                ((relation == L_SELECT_IF_GTE) && (w >= width && h >= height)))
                    ival = 1;
            break;
        default:
            L_WARNING("can't get here!\n", procName);
            break;
        }
        numaAddNumber(na, ival);
    }

    return na;
}
Пример #25
0
int main(int    argc,
         char **argv)
{
l_int32       i, n, w, h;
BOXA         *boxa;
NUMA         *naindex, *naw, *nah, *naw_med, *nah_med;
PIX          *pixs, *pixt;
L_REGPARAMS  *rp;

    if (regTestSetup(argc, argv, &rp))
        return 1;

        /* Generate arrays of word widths and heights */
    pixs = pixRead("feyn.tif");
    pixGetWordBoxesInTextlines(pixs, 1, 6, 6, 500, 50, &boxa, &naindex);
    n = boxaGetCount(boxa);
    naw = numaCreate(0);
    nah = numaCreate(0);
    for (i = 0; i < n; i++) {
        boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h);
        numaAddNumber(naw, w);
        numaAddNumber(nah, h);
    }
    boxaDestroy(&boxa);
    numaDestroy(&naindex);

        /* Make the rank bin arrays of median values, with 10 bins */
    lept_rmfile("/tmp/lept/regout/w_10bin.png");  /* remove existing ones */
    lept_rmfile("/tmp/lept/regout/h_10bin.png");
    lept_rmfile("/tmp/lept/regout/w_30bin.png");
    lept_rmfile("/tmp/lept/regout/h_30bin.png");
    numaGetRankBinValues(naw, 10, NULL, &naw_med);
    numaGetRankBinValues(nah, 10, NULL, &nah_med);
    gplotSimple1(naw_med, GPLOT_PNG, "/tmp/lept/regout/w_10bin",
                 "width vs rank bins (10)");
    gplotSimple1(nah_med, GPLOT_PNG, "/tmp/lept/regout/h_10bin",
                 "height vs rank bins (10)");
    numaDestroy(&naw_med);
    numaDestroy(&nah_med);

        /* Make the rank bin arrays of median values, with 30 bins */
    numaGetRankBinValues(naw, 30, NULL, &naw_med);
    numaGetRankBinValues(nah, 30, NULL, &nah_med);
    gplotSimple1(naw_med, GPLOT_PNG, "/tmp/lept/regout/w_30bin",
                 "width vs rank bins (30)");
    gplotSimple1(nah_med, GPLOT_PNG, "/tmp/lept/regout/h_30bin",
                 "height vs rank bins (30)");
    numaDestroy(&naw_med);
    numaDestroy(&nah_med);

        /* Save as golden files, or check against them */
    regTestCheckFile(rp, "/tmp/lept/regout/w_10bin.png");  /* 0 */
    regTestCheckFile(rp, "/tmp/lept/regout/h_10bin.png");  /* 1 */
    regTestCheckFile(rp, "/tmp/lept/regout/w_30bin.png");  /* 2 */
    regTestCheckFile(rp, "/tmp/lept/regout/h_30bin.png");  /* 3 */

        /* Display results for debugging */
    pixt = pixRead("/tmp/lept/regout/w_10bin.png");
    pixDisplayWithTitle(pixt, 0, 0, NULL, rp->display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/lept/regout/h_10bin.png");
    pixDisplayWithTitle(pixt, 650, 0, NULL, rp->display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/lept/regout/w_30bin.png");
    pixDisplayWithTitle(pixt, 0, 550, NULL, rp->display);
    pixDestroy(&pixt);
    pixt = pixRead("/tmp/lept/regout/h_30bin.png");
    pixDisplayWithTitle(pixt, 650, 550, NULL, rp->display);
    pixDestroy(&pixt);

    pixDestroy(&pixs);
    numaDestroy(&naw);
    numaDestroy(&nah);
    return regTestCleanup(rp);
}