 *  boxaCopy()
 *      Input:  boxa
 *              copyflag (L_COPY, L_CLONE, L_COPY_CLONE)
 *      Return: new boxa, or null on error
 *  Notes:
 *      (1) See pix.h for description of the copyflag.
 *      (2) The copy-clone makes a new boxa that holds clones of each box.
boxaCopy(BOXA    *boxa,
         l_int32  copyflag)
l_int32  i;
BOX     *boxc;
BOXA    *boxac;


    if (!boxa)
        return (BOXA *)ERROR_PTR("boxa not defined", procName, NULL);

    if (copyflag == L_CLONE) {
        return boxa;

    if (copyflag != L_COPY && copyflag != L_COPY_CLONE)
        return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL);

    if ((boxac = boxaCreate(boxa->nalloc)) == NULL)
        return (BOXA *)ERROR_PTR("boxac not made", procName, NULL);
    for (i = 0; i < boxa->n; i++) {
        if (copyflag == L_COPY)
            boxc = boxaGetBox(boxa, i, L_COPY);
        else   /* copy-clone */
            boxc = boxaGetBox(boxa, i, L_CLONE);
        boxaAddBox(boxac, boxc, L_INSERT);
    return boxac;
예제 #2
 *  boxaMergeEvenOdd()
 *      Input:  boxae (boxes to go in even positions in merged boxa)
 *              boxao (boxes to go in odd positions in merged boxa)
 *      Return: boxad (merged), or null on error
 *  Notes:
 *      (1) Boxes are alternatingly selected from boxae and boxao.
 *          Both boxae and boxao are of the same size.
boxaMergeEvenOdd(BOXA  *boxae,
                 BOXA  *boxao)
l_int32  i, n;
BOX     *box;
BOXA    *boxad;


    if (!boxae || !boxao)
        return (BOXA *)ERROR_PTR("boxae and boxao not defined", procName, NULL);
    n = boxaGetCount(boxae);
    if (n != boxaGetCount(boxao))
        return (BOXA *)ERROR_PTR("boxa sizes differ", procName, NULL);

    boxad = boxaCreate(n);
    for (i = 0; i < n; i++) {
        if ((i & 1) == 0)
            box = boxaGetBox(boxae, i, L_COPY);
            box = boxaGetBox(boxao, i, L_COPY);
        boxaAddBox(boxad, box, L_INSERT);
    return boxad;
예제 #3
 * create_cube_box_word
 * Fill the given BoxWord with boxes from character bounding
 * boxes. The char_boxes have local coordinates w.r.t. the
 * word bounding box, i.e., the left-most character bbox of each word
 * has (0,0) left-top coord, but the BoxWord must be defined in page
 * coordinates.
bool Tesseract::create_cube_box_word(Boxa *char_boxes,
                                     int num_chars,
                                     TBOX word_box,
                                     BoxWord* box_word) {
  if (!box_word) {
    if (cube_debug_level > 0) {
      tprintf("Cube WARNING (create_cube_box_word): Invalid box_word.\n");
    return false;

  // Find the x-coordinate of left-most char_box, which could be
  // nonzero if the word image was padded before recognition took place.
  int x_offset = -1;
  for (int i = 0; i < num_chars; ++i) {
    Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
    if (x_offset < 0 || char_box->x < x_offset) {
      x_offset = char_box->x;

  for (int i = 0; i < num_chars; ++i) {
    Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
    TBOX tbox = char_box_to_tbox(char_box, word_box, x_offset);
    box_word->InsertBox(i, tbox);
  return true;
예제 #4
 *  boxaEqual()
 *      Input:  boxa1
 *              boxa2
 *              maxdist
 *              &naindex (<optional return> index array of correspondences
 *              &same (<return> 1 if equal; 0 otherwise)
 *      Return  0 if OK, 1 on error
 *  Notes:
 *      (1) The two boxa are the "same" if they contain the same
 *          boxes and each box is within @maxdist of its counterpart
 *          in their positions within the boxa.  This allows for
 *          small rearrangements.  Use 0 for maxdist if the boxa
 *          must be identical.
 *      (2) This applies only to geometry and ordering; refcounts
 *          are not considered.
 *      (3) @maxdist allows some latitude in the ordering of the boxes.
 *          For the boxa to be the "same", corresponding boxes must
 *          be within @maxdist of each other.  Note that for large
 *          @maxdist, we should use a hash function for efficiency.
 *      (4) naindex[i] gives the position of the box in boxa2 that
 *          corresponds to box i in boxa1.  It is only returned if the
 *          boxa are equal.
boxaEqual(BOXA     *boxa1,
          BOXA     *boxa2,
          l_int32   maxdist,
          NUMA    **pnaindex,
          l_int32  *psame)
l_int32   i, j, n, jstart, jend, found, samebox;
l_int32  *countarray;
BOX      *box1, *box2;
NUMA     *na;


    if (pnaindex) *pnaindex = NULL;
    if (!psame)
        return ERROR_INT("&same not defined", procName, 1);
    *psame = 0;
    if (!boxa1 || !boxa2)
        return ERROR_INT("boxa1 and boxa2 not both defined", procName, 1);
    n = boxaGetCount(boxa1);
    if (n != boxaGetCount(boxa2))
        return 0;

    countarray = (l_int32 *)CALLOC(n, sizeof(l_int32));
    na = numaMakeConstant(0.0, n);

    for (i = 0; i < n; i++) {
        box1 = boxaGetBox(boxa1, i, L_CLONE);
        jstart = L_MAX(0, i - maxdist);
        jend = L_MIN(n-1, i + maxdist);
        found = FALSE;
        for (j = jstart; j <= jend; j++) {
            box2 = boxaGetBox(boxa2, j, L_CLONE);
            boxEqual(box1, box2, &samebox);
            if (samebox && countarray[j] == 0) {
                countarray[j] = 1;
                numaReplaceNumber(na, i, j);
                found = TRUE;
        if (!found) {
            return 0;

    *psame = 1;
    if (pnaindex)
        *pnaindex = na;
    return 0;
예제 #5
 *  boxaGetNearestToPt()
 *      Input:  boxa
 *              x, y  (point)
 *      Return  box (box with centroid closest to the given point [x,y]),
 *              or NULL if no boxes in boxa)
 *  Notes:
 *      (1) Uses euclidean distance between centroid and point.
boxaGetNearestToPt(BOXA    *boxa,
                   l_int32  x,
                   l_int32  y)
l_int32    i, n, minindex;
l_float32  delx, dely, dist, mindist, cx, cy;
BOX       *box;


    if (!boxa)
        return (BOX *)ERROR_PTR("boxa not defined", procName, NULL);
    if ((n = boxaGetCount(boxa)) == 0)
        return (BOX *)ERROR_PTR("n = 0", procName, NULL);
    mindist = 1000000000.;
    minindex = 0;
    for (i = 0; i < n; i++) {
        box = boxaGetBox(boxa, i, L_CLONE);
        boxGetCenter(box, &cx, &cy);
        delx = (l_float32)(cx - x);
        dely = (l_float32)(cy - y);
        dist = delx * delx + dely * dely;
        if (dist < mindist) {
            minindex = i;
            mindist = dist;

    return boxaGetBox(boxa, minindex, L_COPY);
예제 #6
 *  boxaGetCoverage()
 *      Input:  boxa
 *              wc, hc (dimensions of overall clipping rectangle with UL
 *                      corner at (0, 0) that is covered by the boxes.
 *              exactflag (1 for guaranteeing an exact result; 0 for getting
 *                         an exact result only if the boxes do not overlap)
 *              &fract (<return> sum of box area as fraction of w * h)
 *      Return: 0 if OK, 1 on error
 *  Notes:
 *      (1) The boxes in boxa are clipped to the input rectangle.
 *      (2) * When @exactflag == 1, we generate a 1 bpp pix of size
 *            wc x hc, paint all the boxes black, and count the fg pixels.
 *            This can take 1 msec on a large page with many boxes.
 *          * When @exactflag == 0, we clip each box to the wc x hc region
 *            and sum the resulting areas.  This is faster.
 *          * The results are the same when none of the boxes overlap
 *            within the wc x hc region.
boxaGetCoverage(BOXA       *boxa,
                l_int32     wc,
                l_int32     hc,
                l_int32     exactflag,
                l_float32  *pfract)
l_int32  i, n, x, y, w, h, sum;
BOX     *box, *boxc;
PIX     *pixt;


    if (!pfract)
        return ERROR_INT("&fract not defined", procName, 1);
    *pfract = 0.0;
    if (!boxa)
        return ERROR_INT("boxa not defined", procName, 1);

    n = boxaGetCount(boxa);
    if (n == 0)
        return ERROR_INT("no boxes in boxa", procName, 1);

    if (exactflag == 0) {  /* quick and dirty */
        sum = 0;
        for (i = 0; i < n; i++) {
            box = boxaGetBox(boxa, i, L_CLONE);
            if ((boxc = boxClipToRectangle(box, wc, hc)) != NULL) {
                boxGetGeometry(boxc, NULL, NULL, &w, &h);
                sum += w * h;
    else {  /* slower and exact */
        pixt = pixCreate(wc, hc, 1);
        for (i = 0; i < n; i++) {
            box = boxaGetBox(boxa, i, L_CLONE);
            boxGetGeometry(box, &x, &y, &w, &h);
            pixRasterop(pixt, x, y, w, h, PIX_SET, NULL, 0, 0);
        pixCountPixels(pixt, &sum, NULL);

    *pfract = (l_float32)sum / (l_float32)(wc * hc);
    return 0;
예제 #7
 *  boxaEncapsulateAligned()
 *      Input:  boxa
 *              num (number put into each boxa in the baa)
 *              copyflag  (L_COPY or L_CLONE)
 *      Return: boxaa, or null on error
 *  Notes:
 *      (1) This puts @num boxes from the input @boxa into each of a
 *          set of boxa within an output boxaa.
 *      (2) This assumes that the boxes in @boxa are in sets of @num each.
boxaEncapsulateAligned(BOXA    *boxa,
                       l_int32  num,
                       l_int32  copyflag)
l_int32  i, j, n, nbaa, index;
BOX     *box;
BOXA    *boxat;
BOXAA   *baa;


    if (!boxa)
        return (BOXAA *)ERROR_PTR("boxa not defined", procName, NULL);
    if (copyflag != L_COPY && copyflag != L_CLONE)
        return (BOXAA *)ERROR_PTR("invalid copyflag", procName, NULL);

    n = boxaGetCount(boxa);
    nbaa = (n + num - 1) / num;
    if (n / num != nbaa)
        L_ERROR("inconsistent alignment: n / num not an integer", procName);
    baa = boxaaCreate(nbaa);
    for (i = 0, index = 0; i < nbaa; i++) {
        boxat = boxaCreate(num);
        for (j = 0; j < num; j++, index++) {
            box = boxaGetBox(boxa, index, copyflag);
            boxaAddBox(boxat, box, L_INSERT);
        boxaaAddBoxa(baa, boxat, L_INSERT);

    return baa;
예제 #8
 *  boxaClipToBox()
 *      Input:  boxas
 *              box (for clipping)
 *      Return  boxad (boxa with boxes in boxas clipped to box),
 *                     or null on error
 *  Notes:
 *      (1) All boxes in boxa not intersecting with box are removed, and
 *          the remaining boxes are clipped to box.
boxaClipToBox(BOXA  *boxas,
              BOX   *box)
l_int32  i, n;
BOX     *boxt, *boxo;
BOXA    *boxad;


    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
    if (!box)
        return (BOXA *)ERROR_PTR("box not defined", procName, NULL);
    if ((n = boxaGetCount(boxas)) == 0)
        return boxaCreate(1);  /* empty */

    boxad = boxaCreate(0);
    for (i = 0; i < n; i++) {
        boxt = boxaGetBox(boxas, i, L_CLONE);
        if ((boxo = boxOverlapRegion(box, boxt)) != NULL)
            boxaAddBox(boxad, boxo, L_INSERT);

    return boxad;
예제 #9
 *  boxaIntersectsBox()
 *      Input:  boxas
 *              box (for intersecting)
 *      Return  boxad (boxa with all boxes in boxas that intersect box),
 *                     or null on error
 *  Notes:
 *      (1) All boxes in boxa that intersect with box (i.e., are completely
 *          or partially contained in box) are retained.
boxaIntersectsBox(BOXA  *boxas,
                  BOX   *box)
l_int32  i, n, val;
BOX     *boxt;
BOXA    *boxad;


    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
    if (!box)
        return (BOXA *)ERROR_PTR("box not defined", procName, NULL);
    if ((n = boxaGetCount(boxas)) == 0)
        return boxaCreate(1);  /* empty */

    boxad = boxaCreate(0);
    for (i = 0; i < n; i++) {
        boxt = boxaGetBox(boxas, i, L_CLONE);
        boxIntersects(box, boxt, &val);
        if (val == 1)
            boxaAddBox(boxad, boxt, L_COPY);
        boxDestroy(&boxt);  /* destroy the clone */

    return boxad;
예제 #10
 * \brief   boxaConvertToPta()
 * \param[in]    boxa
 * \param[in]    ncorners     2 or 4 for the representation of each box
 * \return  pta with %ncorners points for each box in the boxa,
 *                   or NULL on error
 * <pre>
 * Notes:
 *      (1) If ncorners == 2, we select the UL and LR corners.
 *          Otherwise we save all 4 corners in this order: UL, UR, LL, LR.
 *      (2) Other boxa --> pta functions are:
 *          * boxaExtractAsPta(): allows extraction of any dimension
 *            and/or side location, with each in a separate pta.
 *          * boxaExtractCorners(): extracts any of the four corners as a pta.
 * </pre>
boxaConvertToPta(BOXA    *boxa,
                 l_int32  ncorners)
l_int32  i, n;
BOX     *box;
PTA     *pta, *pta1;


    if (!boxa)
        return (PTA *)ERROR_PTR("boxa not defined", procName, NULL);
    if (ncorners != 2 && ncorners != 4)
        return (PTA *)ERROR_PTR("ncorners not 2 or 4", procName, NULL);

    n = boxaGetCount(boxa);
    if ((pta = ptaCreate(n)) == NULL)
        return (PTA *)ERROR_PTR("pta not made", procName, NULL);
    for (i = 0; i < n; i++) {
        box = boxaGetBox(boxa, i, L_COPY);
        pta1 = boxConvertToPta(box, ncorners);
        ptaJoin(pta, pta1, 0, -1);

    return pta;
 *  boxaWriteStream()
 *      Input: stream
 *             boxa
 *      Return: 0 if OK, 1 on error
boxaWriteStream(FILE  *fp,
                BOXA  *boxa)
l_int32  n, i;
BOX     *box;


    if (!fp)
        return ERROR_INT("stream not defined", procName, 1);
    if (!boxa)
        return ERROR_INT("boxa not defined", procName, 1);

    n = boxaGetCount(boxa);
    fprintf(fp, "\nBoxa Version %d\n", BOXA_VERSION_NUMBER);
    fprintf(fp, "Number of boxes = %d\n", n);
    for (i = 0; i < n; i++) {
        if ((box = boxaGetBox(boxa, i, L_CLONE)) == NULL)
            return ERROR_INT("box not found", procName, 1);
        fprintf(fp, "  Box[%d]: x = %d, y = %d, w = %d, h = %d\n",
                i, box->x, box->y, box->w, box->h);
    return 0;
예제 #12
 *  boxaSplitEvenOdd()
 *      Input:  boxa
 *              &boxae, &boxao (<return> save even and odd boxes in their
 *                 separate boxa, setting the other type to invalid boxes.)
 *      Return: 0 if OK, 1 on error
 *  Notes:
 *      (1) For example, boxae copies of the even boxes, in their original
 *          location, that are in boxa.  Invalid boxes are placed
 *          in the odd array locations.
boxaSplitEvenOdd(BOXA   *boxa,
                 BOXA  **pboxae,
                 BOXA  **pboxao)
l_int32  i, n;
BOX     *box, *boxt;


    if (!pboxae || !pboxao)
        return ERROR_INT("&boxae and &boxao not defined", procName, 1);
    *pboxae = *pboxao = NULL;
    if (!boxa)
        return ERROR_INT("boxa not defined", procName, 1);

    n = boxaGetCount(boxa);
    *pboxae = boxaCreate(n);
    *pboxao = boxaCreate(n);
    for (i = 0; i < n; i++) {
        box = boxaGetBox(boxa, i, L_COPY);
        boxt = boxCreate(0, 0, 0, 0);  /* empty placeholder */
        if ((i & 1) == 0) {
            boxaAddBox(*pboxae, box, L_INSERT);
            boxaAddBox(*pboxao, boxt, L_INSERT);
        else {
            boxaAddBox(*pboxae, boxt, L_INSERT);
            boxaAddBox(*pboxao, box, L_INSERT);
    return 0;
 *  boxaGetBoxGeometry()
 *      Input:  boxa
 *              index  (to the index-th box)
 *              &x, &y, &w, &h (<optional return>; each can be null)
 *      Return: 0 if OK, 1 on error
boxaGetBoxGeometry(BOXA     *boxa,
                   l_int32   index,
                   l_int32  *px,
                   l_int32  *py,
                   l_int32  *pw,
                   l_int32  *ph)
BOX  *box;


    if (px) *px = 0;
    if (py) *py = 0;
    if (pw) *pw = 0;
    if (ph) *ph = 0;
    if (!boxa)
        return ERROR_INT("boxa not defined", procName, 1);
    if (index < 0 || index >= boxa->n)
        return ERROR_INT("index not valid", procName, 1);

    if ((box = boxaGetBox(boxa, index, L_CLONE)) == NULL)
        return ERROR_INT("box not found!", procName, 1);
    boxGetGeometry(box, px, py, pw, ph);
    return 0;
예제 #14
 *  boxaTransform()
 *      Input:  boxa
 *              shiftx, shifty
 *              scalex, scaley
 *      Return: boxad, or null on error
 *  Notes:
 *      (1) This is a very simple function that first shifts, then scales.
boxaTransform(BOXA      *boxas,
              l_int32    shiftx,
              l_int32    shifty,
              l_float32  scalex,
              l_float32  scaley)
l_int32  i, n;
BOX     *boxs, *boxd;
BOXA    *boxad;


    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
    n = boxaGetCount(boxas);
    if ((boxad = boxaCreate(n)) == NULL)
        return (BOXA *)ERROR_PTR("boxad not made", procName, NULL);
    for (i = 0; i < n; i++) {
        if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL)
            return (BOXA *)ERROR_PTR("boxs not found", procName, NULL);
        boxd = boxTransform(boxs, shiftx, shifty, scalex, scaley);
        boxaAddBox(boxad, boxd, L_INSERT);

    return boxad;
예제 #15
 *  boxaSortByIndex()
 *      Input:  boxas
 *              naindex (na that maps from the new boxa to the input boxa)
 *      Return: boxad (sorted), or null on error
boxaSortByIndex(BOXA  *boxas,
                NUMA  *naindex)
l_int32  i, n, index;
BOX     *box;
BOXA    *boxad;


    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
    if (!naindex)
        return (BOXA *)ERROR_PTR("naindex not defined", procName, NULL);

    n = boxaGetCount(boxas);
    boxad = boxaCreate(n);
    for (i = 0; i < n; i++) {
        numaGetIValue(naindex, i, &index);
        box = boxaGetBox(boxas, index, L_COPY);
        boxaAddBox(boxad, box, L_INSERT);

    return boxad;
예제 #16
 *  boxaRotateOrth()
 *      Input:  boxa
 *              w, h (of image in which the boxa is embedded)
 *              rotation (0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg;
 *                        all rotations are clockwise)
 *      Return: boxad, or null on error
 *  Notes:
 *      (1) See boxRotateOrth() for details.
boxaRotateOrth(BOXA    *boxas,
               l_int32  w,
               l_int32  h,
               l_int32  rotation)
l_int32  i, n;
BOX     *boxs, *boxd;
BOXA    *boxad;


    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
    if (rotation == 0)
        return boxaCopy(boxas, L_COPY);
    if (rotation < 1 || rotation > 3)
        return (BOXA *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL);

    n = boxaGetCount(boxas);
    if ((boxad = boxaCreate(n)) == NULL)
        return (BOXA *)ERROR_PTR("boxad not made", procName, NULL);
    for (i = 0; i < n; i++) {
        if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL)
            return (BOXA *)ERROR_PTR("boxs not found", procName, NULL);
        boxd = boxRotateOrth(boxs, w, h, rotation);
        boxaAddBox(boxad, boxd, L_INSERT);

    return boxad;
예제 #17
파일: quadtree.c 프로젝트: creatale/node-dv
 * \brief   pixQuadtreeMean()
 * \param[in]    pixs     8 bpp, no colormap
 * \param[in]    nlevels  in quadtree; max allowed depends on image size
 * \param[in]   *pix_ma   input mean accumulator; can be null
 * \param[out]  *pfpixa   mean values in quadtree
 * \return  0 if OK, 1 on error
 * <pre>
 * Notes:
 *      (1) The returned fpixa has %nlevels of fpix, each containing
 *          the mean values at its level.  Level 0 has a
 *          single value; level 1 has 4 values; level 2 has 16; etc.
 * </pre>
pixQuadtreeMean(PIX     *pixs,
                l_int32  nlevels,
                PIX     *pix_ma,
                FPIXA  **pfpixa)
l_int32    i, j, w, h, size, n;
l_float32  val;
BOX       *box;
BOXA      *boxa;
BOXAA     *baa;
FPIX      *fpix;
PIX       *pix_mac;


    if (!pfpixa)
        return ERROR_INT("&fpixa not defined", procName, 1);
    *pfpixa = NULL;
    if (!pixs || pixGetDepth(pixs) != 8)
        return ERROR_INT("pixs not defined or not 8 bpp", procName, 1);
    pixGetDimensions(pixs, &w, &h, NULL);
    if (nlevels > quadtreeMaxLevels(w, h))
        return ERROR_INT("nlevels too large for image", procName, 1);

    if (!pix_ma)
        pix_mac = pixBlockconvAccum(pixs);
        pix_mac = pixClone(pix_ma);
    if (!pix_mac)
        return ERROR_INT("pix_mac not made", procName, 1);

    if ((baa = boxaaQuadtreeRegions(w, h, nlevels)) == NULL) {
        return ERROR_INT("baa not made", procName, 1);

    *pfpixa = fpixaCreate(nlevels);
    for (i = 0; i < nlevels; i++) {
        boxa = boxaaGetBoxa(baa, i, L_CLONE);
        size = 1 << i;
        n = boxaGetCount(boxa);  /* n == size * size */
        fpix = fpixCreate(size, size);
        for (j = 0; j < n; j++) {
            box = boxaGetBox(boxa, j, L_CLONE);
            pixMeanInRectangle(pixs, box, pix_mac, &val);
            fpixSetPixel(fpix, j % size, j / size, val);
        fpixaAddFPix(*pfpixa, fpix, L_INSERT);

    return 0;
예제 #18
void tesser::ocrThread(QString filename)
    char *text;
    QImage image(filename);

    if (image.isNull())

    qDebug() << image;

    QImage g=image.convertToFormat(QImage::Format_Grayscale8);
    qDebug() << g;
    if (g.isNull())




#if 1
    Boxa* boxes = m_tess->GetComponentImages(tesseract::RIL_BLOCK, true, NULL, NULL);
    qDebug() << "Textline image components found: " << boxes->n;

    for (int i = 0; i < boxes->n; i++) {
        BOX* box = boxaGetBox(boxes, i, L_CLONE);
        m_tess->SetRectangle(box->x, box->y, box->w, box->h);
        text = m_tess->GetUTF8Text();
        int conf = m_tess->MeanTextConf();
        fprintf(stderr, "Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s",
                       i, box->x, box->y, box->w, box->h, conf, text);
        if (conf>m_min_conf)
            qDebug() << "Confidence under " << m_min_conf << " skipping";
    qDebug() << m_tess->MeanTextConf();


    qDebug() << m_text;

예제 #19
파일: croptext.c 프로젝트: chewi/leptonica
int main(int    argc,
         char **argv)
char        *dirin, *dirout, *infile, *outfile, *tail;
l_int32      i, nfiles, border, x, y, w, h, xb, yb, wb, hb;
BOX         *box1, *box2;
BOXA        *boxa1, *boxa2;
PIX         *pixs, *pixt1, *pixd;
SARRAY      *safiles;
static char  mainName[] = "croptext";

    if (argc != 4)
        return ERROR_INT("Syntax: croptext dirin border dirout", mainName, 1);
    dirin = argv[1];
    border = atoi(argv[2]);
    dirout = argv[3];

    safiles = getSortedPathnamesInDirectory(dirin, NULL, 0, 0);
    nfiles = sarrayGetCount(safiles);

    for (i = 0; i < nfiles; i++) {
        infile = sarrayGetString(safiles, i, L_NOCOPY);
        splitPathAtDirectory(infile, NULL, &tail);
        outfile = genPathname(dirout, tail);
        pixs = pixRead(infile);
        pixt1 = pixMorphSequence(pixs, "r11 + c10.40 + o5.5 + x4", 0);
        boxa1 = pixConnComp(pixt1, NULL, 8);
        if (boxaGetCount(boxa1) == 0) {
            fprintf(stderr, "Warning: no components on page %s\n", tail);
        boxa2 = boxaSort(boxa1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL);
        box1 = boxaGetBox(boxa2, 0, L_CLONE);
        boxGetGeometry(box1, &x, &y, &w, &h);
        xb = L_MAX(0, x - border);
        yb = L_MAX(0, y - border);
        wb = w + 2 * border;
        hb = h + 2 * border;
        box2 = boxCreate(xb, yb, wb, hb);
        pixd = pixClipRectangle(pixs, box2, NULL);
        pixWrite(outfile, pixd, IFF_TIFF_G4);


    return 0;
// This method returns the computed mode-height of blobs in the pix.
// It also prunes very small blobs from calculation.
int ShiroRekhaSplitter::GetModeHeight(Pix* pix) {
  Boxa* boxa = pixConnComp(pix, NULL, 8);
  STATS heights(0, pixGetHeight(pix));
  for (int i = 0; i < boxaGetCount(boxa); ++i) {
    Box* box = boxaGetBox(boxa, i, L_CLONE);
    if (box->h >= 3 || box->w >= 3) {
      heights.add(box->h, 1);
  return heights.mode();
예제 #21
l_int32 renderTransformedBoxa(PIX *pixt, BOXA *boxa, l_int32 i) {
    l_int32 j, n, rval, gval, bval;
    BOX *box;
    n = boxaGetCount(boxa);
    rval = (1413 * i) % 256;
    gval = (4917 * i) % 256;
    bval = (7341 * i) % 256;
    for (j = 0; j < n; j++) {
        box = boxaGetBox(boxa, j, L_CLONE);
        pixRenderHashBoxArb(pixt, box, 10, 3, i % 4, 1, rval, gval, bval);
    return 0;
예제 #22
 *  pixaCreateFromBoxa()
 *      Input:  pixs
 *              boxa
 *              &cropwarn (<optional return> TRUE if the boxa extent
 *                         is larger than pixs.
 *      Return: pixad, or null on error
 *  Notes:
 *      (1) This simply extracts from pixs the region corresponding to each
 *          box in the boxa.
 *      (2) The 3rd arg is optional.  If the extent of the boxa exceeds the
 *          size of the pixa, so that some boxes are either clipped
 *          or entirely outside the pix, a warning is returned as TRUE.
 *      (3) pixad will have only the properly clipped elements, and
 *          the internal boxa will be correct.
pixaCreateFromBoxa(PIX      *pixs,
                   BOXA     *boxa,
                   l_int32  *pcropwarn)
l_int32  i, n, w, h, wbox, hbox, cropwarn;
BOX     *box, *boxc;
PIX     *pixd;
PIXA    *pixad;


    if (!pixs)
        return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL);
    if (!boxa)
        return (PIXA *)ERROR_PTR("boxa not defined", procName, NULL);

    n = boxaGetCount(boxa);
    if ((pixad = pixaCreate(n)) == NULL)
        return (PIXA *)ERROR_PTR("pixad not made", procName, NULL);

    boxaGetExtent(boxa, &wbox, &hbox, NULL);
    pixGetDimensions(pixs, &w, &h, NULL);
    cropwarn = FALSE;
    if (wbox > w || hbox > h)
        cropwarn = TRUE;
    if (pcropwarn)
        *pcropwarn = cropwarn;

    for (i = 0; i < n; i++) {
        box = boxaGetBox(boxa, i, L_COPY);
        if (cropwarn) {  /* if box is outside pixs, pixd is NULL */
            pixd = pixClipRectangle(pixs, box, &boxc);  /* may be NULL */
            if (pixd) {
                pixaAddPix(pixad, pixd, L_INSERT);
                pixaAddBox(pixad, boxc, L_INSERT);
        else {
            pixd = pixClipRectangle(pixs, box, NULL);
            pixaAddPix(pixad, pixd, L_INSERT);
            pixaAddBox(pixad, box, L_INSERT);

    return pixad;
예제 #23
 *  boxaaFlattenToBoxa()
 *      Input:  boxaa
 *              &naindex  (<optional return> the boxa index in the boxaa)
 *              copyflag  (L_COPY or L_CLONE)
 *      Return: boxa, or null on error
 *  Notes:
 *      (1) This 'flattens' the boxaa to a boxa, taking the boxes in
 *          order in the first boxa, then the second, etc.
 *      (2) If a boxa is empty, we generate an invalid, placeholder box
 *          of zero size.  This is useful when converting from a boxaa
 *          where each boxa has either 0 or 1 boxes, and it is necessary
 *          to maintain a 1:1 correspondence between the initial
 *          boxa array and the resulting box array.
 *      (3) If &naindex is defined, we generate a Numa that gives, for
 *          each box in the boxaa, the index of the boxa to which it belongs.
boxaaFlattenToBoxa(BOXAA   *baa,
                   NUMA   **pnaindex,
                   l_int32  copyflag)
l_int32  i, j, m, n;
BOXA    *boxa, *boxat;
BOX     *box;
NUMA    *naindex;


    if (pnaindex) *pnaindex = NULL;
    if (!baa)
        return (BOXA *)ERROR_PTR("baa not defined", procName, NULL);
    if (copyflag != L_COPY && copyflag != L_CLONE)
        return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL);
    if (pnaindex) {
        naindex = numaCreate(0);
        *pnaindex = naindex;

    n = boxaaGetCount(baa);
    boxa = boxaCreate(n);
    for (i = 0; i < n; i++) {
        boxat = boxaaGetBoxa(baa, i, L_CLONE);
        m = boxaGetCount(boxat);
        if (m == 0) {  /* placeholder box */
            box = boxCreate(0, 0, 0, 0);
            boxaAddBox(boxa, box, L_INSERT);
            if (pnaindex)
                numaAddNumber(naindex, i);  /* save 'row' number */
        else {
            for (j = 0; j < m; j++) {
                box = boxaGetBox(boxat, j, copyflag);
                boxaAddBox(boxa, box, L_INSERT);
                if (pnaindex)
                    numaAddNumber(naindex, i);  /* save 'row' number */

    return boxa;
예제 #24
/* static */
void BoxChar::RotateBoxes(float rotation, int xcenter, int ycenter,
                          int start_box, int end_box, vector<BoxChar*>* boxes) {
  Boxa* orig = boxaCreate(0);
  for (int i = start_box; i < end_box; ++i) {
    BOX* box = (*boxes)[i]->box_;
    if (box) boxaAddBox(orig, box, L_CLONE);
  Boxa* rotated = boxaRotate(orig, xcenter, ycenter, rotation);
  for (int i = start_box, box_ind = 0; i < end_box; ++i) {
    if ((*boxes)[i]->box_) {
      (*boxes)[i]->box_ = boxaGetBox(rotated, box_ind++, L_CLONE);
예제 #25
 *  boxaaGetBox()
 *      Input:  baa
 *              iboxa  (index into the boxa array in the boxaa)
 *              ibox  (index into the box array in the boxa)
 *              accessflag   (L_COPY or L_CLONE)
 *      Return: box, or null on error
boxaaGetBox(BOXAA   *baa,
            l_int32  iboxa,
            l_int32  ibox,
            l_int32  accessflag)
BOX   *box;
BOXA  *boxa;


    if ((boxa = boxaaGetBoxa(baa, iboxa, L_CLONE)) == NULL)
        return (BOX *)ERROR_PTR("boxa not retrieved", procName, NULL);
    if ((box = boxaGetBox(boxa, ibox, accessflag)) == NULL)
        L_ERROR("box not retrieved", procName);
    return box;
예제 #26
// Helper erases false-positive line segments from the input/output line_pix.
// 1. Since thick lines shouldn't really break up, we can eliminate some false
//    positives by marking segments that are at least kMinThickLineWidth
//    thickness, yet have a length less than min_thick_length.
// 2. Lines that don't have at least 2 intersections with other lines and have
//    a lot of neighbouring non-lines are probably not lines (perhaps arabic
//    or Hindi words, or underlines.)
// Bad line components are erased from line_pix.
// Returns the number of remaining connected components.
static int FilterFalsePositives(int resolution, Pix* nonline_pix,
                                Pix* intersection_pix, Pix* line_pix) {
  int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple);
  Pixa* pixa = NULL;
  Boxa* boxa = pixConnComp(line_pix, &pixa, 8);
  // Iterate over the boxes to remove false positives.
  int nboxes = boxaGetCount(boxa);
  int remaining_boxes = nboxes;
  for (int i = 0; i < nboxes; ++i) {
    Box* box = boxaGetBox(boxa, i, L_CLONE);
    l_int32 x, y, box_width, box_height;
    boxGetGeometry(box, &x, &y, &box_width, &box_height);
    Pix* comp_pix = pixaGetPix(pixa, i, L_CLONE);
    int max_width = MaxStrokeWidth(comp_pix);
    bool bad_line = false;
    // If the length is too short to stand-alone as a line, and the box width
    // is thick enough, and the stroke width is thick enough it is bad.
    if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth &&
        box_width < min_thick_length && box_height < min_thick_length &&
        max_width > kMinThickLineWidth) {
      // Too thick for the length.
      bad_line = true;
    if (!bad_line &&
        (intersection_pix == NULL ||
        NumTouchingIntersections(box, intersection_pix) < 2)) {
      // Test non-line density near the line.
      int nonline_count = CountPixelsAdjacentToLine(max_width, box,
      if (nonline_count > box_height * box_width * kMaxNonLineDensity)
        bad_line = true;
    if (bad_line) {
      // Not a good line.
      pixClearInRect(line_pix, box);
  return remaining_boxes;
 * \brief   boxaExtractSortedPattern()
 * \param[in]    boxa typ. of word bounding boxes, in textline order
 * \param[in]    na   index of textline for each box in boxa
 * \return  naa NUMAA, where each numa represents one textline,
 *                   or NULL on error
 * <pre>
 * Notes:
 *      (1) The input is expected to come from pixGetWordBoxesInTextlines().
 *      (2) Each numa in the output consists of an average y coordinate
 *          of the first box in the textline, followed by pairs of
 *          x coordinates representing the left and right edges of each
 *          of the boxes in the textline.
 * </pre>
boxaExtractSortedPattern(BOXA  *boxa,
                         NUMA  *na)
l_int32  index, nbox, row, prevrow, x, y, w, h;
BOX     *box;
NUMA    *nad;
NUMAA   *naa;


    if (!boxa)
        return (NUMAA *)ERROR_PTR("boxa not defined", procName, NULL);
    if (!na)
        return (NUMAA *)ERROR_PTR("na not defined", procName, NULL);

    naa = numaaCreate(0);
    nbox = boxaGetCount(boxa);
    if (nbox == 0)
        return naa;

    prevrow = -1;
    for (index = 0; index < nbox; index++) {
        box = boxaGetBox(boxa, index, L_CLONE);
        numaGetIValue(na, index, &row);
        if (row > prevrow) {
            if (index > 0)
                numaaAddNuma(naa, nad, L_INSERT);
            nad = numaCreate(0);
            prevrow = row;
            boxGetGeometry(box, NULL, &y, NULL, &h);
            numaAddNumber(nad, y + h / 2);
        boxGetGeometry(box, &x, NULL, &w, NULL);
        numaAddNumber(nad, x);
        numaAddNumber(nad, x + w - 1);
    numaaAddNuma(naa, nad, L_INSERT);

    return naa;
예제 #28
 *  boxaGetValidBox()
 *      Input:  boxa
 *              index  (to the index-th box)
 *              accessflag  (L_COPY or L_CLONE)
 *      Return: box, or null if box is not valid or on error
 *  Notes:
 *      (1) This returns NULL for an invalid box in a boxa.
 *          For a box to be valid, both the width and height must be > 0.
 *      (2) We allow invalid boxes, with w = 0 or h = 0, as placeholders
 *          in boxa for which the index of the box in the boxa is important.
 *          This is an atypical situation; usually you want to put only
 *          valid boxes in a boxa.
boxaGetValidBox(BOXA    *boxa,
                l_int32  index,
                l_int32  accessflag)
l_int32  w, h;
BOX     *box;


    if (!boxa)
        return (BOX *)ERROR_PTR("boxa not defined", procName, NULL);

    if ((box = boxaGetBox(boxa, index, accessflag)) == NULL)
        return (BOX *)ERROR_PTR("box not returned", procName, NULL);
    boxGetGeometry(box, NULL, NULL, &w, &h);
    if (w <= 0 || h <= 0)  /* not valid, but not necessarily an error */
    return box;
예제 #29
 *  boxaSort2dByIndex()
 *      Input:  boxas
 *              naa (numaa that maps from the new baa to the input boxa)
 *      Return: baa (sorted boxaa), or null on error
boxaSort2dByIndex(BOXA   *boxas,
                  NUMAA  *naa)
l_int32  ntot, boxtot, i, j, n, nn, index;
BOX     *box;
BOXA    *boxa;
BOXAA   *baa;
NUMA    *na;


    if (!boxas)
        return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL);
    if (!naa)
        return (BOXAA *)ERROR_PTR("naindex not defined", procName, NULL);

        /* Check counts */
    ntot = numaaGetNumberCount(naa);
    boxtot = boxaGetCount(boxas);
    if (ntot != boxtot)
        return (BOXAA *)ERROR_PTR("element count mismatch", procName, NULL);

    n = numaaGetCount(naa);
    baa = boxaaCreate(n);
    for (i = 0; i < n; i++) {
        na = numaaGetNuma(naa, i, L_CLONE);
        nn = numaGetCount(na);
        boxa = boxaCreate(nn);
        for (j = 0; j < nn; j++) {
            numaGetIValue(na, i, &index);
            box = boxaGetBox(boxas, index, L_COPY);
            boxaAddBox(boxa, box, L_INSERT);
        boxaaAddBoxa(baa, boxa, L_INSERT);

    return baa;
예제 #30
 * \brief   boxaSelectRange()
 * \param[in]    boxas
 * \param[in]    first      use 0 to select from the beginning
 * \param[in]    last       use -1 to select to the end
 * \param[in]    copyflag   L_COPY, L_CLONE
 * \return  boxad, or NULL on error
 * <pre>
 * Notes:
 *      (1) The copyflag specifies what we do with each box from boxas.
 *          Specifically, L_CLONE inserts a clone into boxad of each
 *          selected box from boxas.
 * </pre>
boxaSelectRange(BOXA    *boxas,
                l_int32  first,
                l_int32  last,
                l_int32  copyflag)
l_int32  n, nbox, i;
BOX     *box;
BOXA    *boxad;


    if (!boxas)
        return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL);
    if (copyflag != L_COPY && copyflag != L_CLONE)
        return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL);
    if ((n = boxaGetCount(boxas)) == 0) {
        L_WARNING("boxas is empty\n", procName);
        return boxaCopy(boxas, copyflag);
    first = L_MAX(0, first);
    if (last < 0) last = n - 1;
    if (first >= n)
        return (BOXA *)ERROR_PTR("invalid first", procName, NULL);
    if (last >= n) {
        L_WARNING("last = %d is beyond max index = %d; adjusting\n",
                  procName, last, n - 1);
        last = n - 1;
    if (first > last)
        return (BOXA *)ERROR_PTR("first > last", procName, NULL);

    nbox = last - first + 1;
    boxad = boxaCreate(nbox);
    for (i = first; i <= last; i++) {
        box = boxaGetBox(boxas, i, copyflag);
        boxaAddBox(boxad, box, L_INSERT);
    return boxad;