/*! * boxaCopy() * * Input: boxa * copyflag (L_COPY, L_CLONE, L_COPY_CLONE) * Return: new boxa, or null on error * * Notes: * (1) See pix.h for description of the copyflag. * (2) The copy-clone makes a new boxa that holds clones of each box. */ BOXA * boxaCopy(BOXA *boxa, l_int32 copyflag) { l_int32 i; BOX *boxc; BOXA *boxac; PROCNAME("boxaCopy"); if (!boxa) return (BOXA *)ERROR_PTR("boxa not defined", procName, NULL); if (copyflag == L_CLONE) { boxa->refcount++; return boxa; } if (copyflag != L_COPY && copyflag != L_COPY_CLONE) return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL); if ((boxac = boxaCreate(boxa->nalloc)) == NULL) return (BOXA *)ERROR_PTR("boxac not made", procName, NULL); for (i = 0; i < boxa->n; i++) { if (copyflag == L_COPY) boxc = boxaGetBox(boxa, i, L_COPY); else /* copy-clone */ boxc = boxaGetBox(boxa, i, L_CLONE); boxaAddBox(boxac, boxc, L_INSERT); } return boxac; }
/*! * boxaMergeEvenOdd() * * Input: boxae (boxes to go in even positions in merged boxa) * boxao (boxes to go in odd positions in merged boxa) * Return: boxad (merged), or null on error * * Notes: * (1) Boxes are alternatingly selected from boxae and boxao. * Both boxae and boxao are of the same size. */ BOXA * boxaMergeEvenOdd(BOXA *boxae, BOXA *boxao) { l_int32 i, n; BOX *box; BOXA *boxad; PROCNAME("boxaMergeEvenOdd"); if (!boxae || !boxao) return (BOXA *)ERROR_PTR("boxae and boxao not defined", procName, NULL); n = boxaGetCount(boxae); if (n != boxaGetCount(boxao)) return (BOXA *)ERROR_PTR("boxa sizes differ", procName, NULL); boxad = boxaCreate(n); for (i = 0; i < n; i++) { if ((i & 1) == 0) box = boxaGetBox(boxae, i, L_COPY); else box = boxaGetBox(boxao, i, L_COPY); boxaAddBox(boxad, box, L_INSERT); } return boxad; }
/********************************************************************** * create_cube_box_word * * Fill the given BoxWord with boxes from character bounding * boxes. The char_boxes have local coordinates w.r.t. the * word bounding box, i.e., the left-most character bbox of each word * has (0,0) left-top coord, but the BoxWord must be defined in page * coordinates. **********************************************************************/ bool Tesseract::create_cube_box_word(Boxa *char_boxes, int num_chars, TBOX word_box, BoxWord* box_word) { if (!box_word) { if (cube_debug_level > 0) { tprintf("Cube WARNING (create_cube_box_word): Invalid box_word.\n"); } return false; } // Find the x-coordinate of left-most char_box, which could be // nonzero if the word image was padded before recognition took place. int x_offset = -1; for (int i = 0; i < num_chars; ++i) { Box* char_box = boxaGetBox(char_boxes, i, L_CLONE); if (x_offset < 0 || char_box->x < x_offset) { x_offset = char_box->x; } boxDestroy(&char_box); } for (int i = 0; i < num_chars; ++i) { Box* char_box = boxaGetBox(char_boxes, i, L_CLONE); TBOX tbox = char_box_to_tbox(char_box, word_box, x_offset); boxDestroy(&char_box); box_word->InsertBox(i, tbox); } return true; }
/*! * boxaEqual() * * Input: boxa1 * boxa2 * maxdist * &naindex (<optional return> index array of correspondences * &same (<return> 1 if equal; 0 otherwise) * Return 0 if OK, 1 on error * * Notes: * (1) The two boxa are the "same" if they contain the same * boxes and each box is within @maxdist of its counterpart * in their positions within the boxa. This allows for * small rearrangements. Use 0 for maxdist if the boxa * must be identical. * (2) This applies only to geometry and ordering; refcounts * are not considered. * (3) @maxdist allows some latitude in the ordering of the boxes. * For the boxa to be the "same", corresponding boxes must * be within @maxdist of each other. Note that for large * @maxdist, we should use a hash function for efficiency. * (4) naindex[i] gives the position of the box in boxa2 that * corresponds to box i in boxa1. It is only returned if the * boxa are equal. */ l_int32 boxaEqual(BOXA *boxa1, BOXA *boxa2, l_int32 maxdist, NUMA **pnaindex, l_int32 *psame) { l_int32 i, j, n, jstart, jend, found, samebox; l_int32 *countarray; BOX *box1, *box2; NUMA *na; PROCNAME("boxaEqual"); if (pnaindex) *pnaindex = NULL; if (!psame) return ERROR_INT("&same not defined", procName, 1); *psame = 0; if (!boxa1 || !boxa2) return ERROR_INT("boxa1 and boxa2 not both defined", procName, 1); n = boxaGetCount(boxa1); if (n != boxaGetCount(boxa2)) return 0; countarray = (l_int32 *)CALLOC(n, sizeof(l_int32)); na = numaMakeConstant(0.0, n); for (i = 0; i < n; i++) { box1 = boxaGetBox(boxa1, i, L_CLONE); jstart = L_MAX(0, i - maxdist); jend = L_MIN(n-1, i + maxdist); found = FALSE; for (j = jstart; j <= jend; j++) { box2 = boxaGetBox(boxa2, j, L_CLONE); boxEqual(box1, box2, &samebox); if (samebox && countarray[j] == 0) { countarray[j] = 1; numaReplaceNumber(na, i, j); found = TRUE; boxDestroy(&box2); break; } boxDestroy(&box2); } boxDestroy(&box1); if (!found) { numaDestroy(&na); FREE(countarray); return 0; } } *psame = 1; if (pnaindex) *pnaindex = na; else numaDestroy(&na); FREE(countarray); return 0; }
/*! * boxaGetNearestToPt() * * Input: boxa * x, y (point) * Return box (box with centroid closest to the given point [x,y]), * or NULL if no boxes in boxa) * * Notes: * (1) Uses euclidean distance between centroid and point. */ BOX * boxaGetNearestToPt(BOXA *boxa, l_int32 x, l_int32 y) { l_int32 i, n, minindex; l_float32 delx, dely, dist, mindist, cx, cy; BOX *box; PROCNAME("boxaGetNearestToPt"); if (!boxa) return (BOX *)ERROR_PTR("boxa not defined", procName, NULL); if ((n = boxaGetCount(boxa)) == 0) return (BOX *)ERROR_PTR("n = 0", procName, NULL); mindist = 1000000000.; minindex = 0; for (i = 0; i < n; i++) { box = boxaGetBox(boxa, i, L_CLONE); boxGetCenter(box, &cx, &cy); delx = (l_float32)(cx - x); dely = (l_float32)(cy - y); dist = delx * delx + dely * dely; if (dist < mindist) { minindex = i; mindist = dist; } boxDestroy(&box); } return boxaGetBox(boxa, minindex, L_COPY); }
/*! * boxaGetCoverage() * * Input: boxa * wc, hc (dimensions of overall clipping rectangle with UL * corner at (0, 0) that is covered by the boxes. * exactflag (1 for guaranteeing an exact result; 0 for getting * an exact result only if the boxes do not overlap) * &fract (<return> sum of box area as fraction of w * h) * Return: 0 if OK, 1 on error * * Notes: * (1) The boxes in boxa are clipped to the input rectangle. * (2) * When @exactflag == 1, we generate a 1 bpp pix of size * wc x hc, paint all the boxes black, and count the fg pixels. * This can take 1 msec on a large page with many boxes. * * When @exactflag == 0, we clip each box to the wc x hc region * and sum the resulting areas. This is faster. * * The results are the same when none of the boxes overlap * within the wc x hc region. */ l_int32 boxaGetCoverage(BOXA *boxa, l_int32 wc, l_int32 hc, l_int32 exactflag, l_float32 *pfract) { l_int32 i, n, x, y, w, h, sum; BOX *box, *boxc; PIX *pixt; PROCNAME("boxaGetCoverage"); if (!pfract) return ERROR_INT("&fract not defined", procName, 1); *pfract = 0.0; if (!boxa) return ERROR_INT("boxa not defined", procName, 1); n = boxaGetCount(boxa); if (n == 0) return ERROR_INT("no boxes in boxa", procName, 1); if (exactflag == 0) { /* quick and dirty */ sum = 0; for (i = 0; i < n; i++) { box = boxaGetBox(boxa, i, L_CLONE); if ((boxc = boxClipToRectangle(box, wc, hc)) != NULL) { boxGetGeometry(boxc, NULL, NULL, &w, &h); sum += w * h; boxDestroy(&boxc); } boxDestroy(&box); } } else { /* slower and exact */ pixt = pixCreate(wc, hc, 1); for (i = 0; i < n; i++) { box = boxaGetBox(boxa, i, L_CLONE); boxGetGeometry(box, &x, &y, &w, &h); pixRasterop(pixt, x, y, w, h, PIX_SET, NULL, 0, 0); boxDestroy(&box); } pixCountPixels(pixt, &sum, NULL); pixDestroy(&pixt); } *pfract = (l_float32)sum / (l_float32)(wc * hc); return 0; }
/*! * boxaEncapsulateAligned() * * Input: boxa * num (number put into each boxa in the baa) * copyflag (L_COPY or L_CLONE) * Return: boxaa, or null on error * * Notes: * (1) This puts @num boxes from the input @boxa into each of a * set of boxa within an output boxaa. * (2) This assumes that the boxes in @boxa are in sets of @num each. */ BOXAA * boxaEncapsulateAligned(BOXA *boxa, l_int32 num, l_int32 copyflag) { l_int32 i, j, n, nbaa, index; BOX *box; BOXA *boxat; BOXAA *baa; PROCNAME("boxaEncapsulateAligned"); if (!boxa) return (BOXAA *)ERROR_PTR("boxa not defined", procName, NULL); if (copyflag != L_COPY && copyflag != L_CLONE) return (BOXAA *)ERROR_PTR("invalid copyflag", procName, NULL); n = boxaGetCount(boxa); nbaa = (n + num - 1) / num; if (n / num != nbaa) L_ERROR("inconsistent alignment: n / num not an integer", procName); baa = boxaaCreate(nbaa); for (i = 0, index = 0; i < nbaa; i++) { boxat = boxaCreate(num); for (j = 0; j < num; j++, index++) { box = boxaGetBox(boxa, index, copyflag); boxaAddBox(boxat, box, L_INSERT); } boxaaAddBoxa(baa, boxat, L_INSERT); } return baa; }
/*! * boxaClipToBox() * * Input: boxas * box (for clipping) * Return boxad (boxa with boxes in boxas clipped to box), * or null on error * * Notes: * (1) All boxes in boxa not intersecting with box are removed, and * the remaining boxes are clipped to box. */ BOXA * boxaClipToBox(BOXA *boxas, BOX *box) { l_int32 i, n; BOX *boxt, *boxo; BOXA *boxad; PROCNAME("boxaClipToBox"); if (!boxas) return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); if (!box) return (BOXA *)ERROR_PTR("box not defined", procName, NULL); if ((n = boxaGetCount(boxas)) == 0) return boxaCreate(1); /* empty */ boxad = boxaCreate(0); for (i = 0; i < n; i++) { boxt = boxaGetBox(boxas, i, L_CLONE); if ((boxo = boxOverlapRegion(box, boxt)) != NULL) boxaAddBox(boxad, boxo, L_INSERT); boxDestroy(&boxt); } return boxad; }
/*! * boxaIntersectsBox() * * Input: boxas * box (for intersecting) * Return boxad (boxa with all boxes in boxas that intersect box), * or null on error * * Notes: * (1) All boxes in boxa that intersect with box (i.e., are completely * or partially contained in box) are retained. */ BOXA * boxaIntersectsBox(BOXA *boxas, BOX *box) { l_int32 i, n, val; BOX *boxt; BOXA *boxad; PROCNAME("boxaIntersectsBox"); if (!boxas) return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); if (!box) return (BOXA *)ERROR_PTR("box not defined", procName, NULL); if ((n = boxaGetCount(boxas)) == 0) return boxaCreate(1); /* empty */ boxad = boxaCreate(0); for (i = 0; i < n; i++) { boxt = boxaGetBox(boxas, i, L_CLONE); boxIntersects(box, boxt, &val); if (val == 1) boxaAddBox(boxad, boxt, L_COPY); boxDestroy(&boxt); /* destroy the clone */ } return boxad; }
/*! * \brief boxaConvertToPta() * * \param[in] boxa * \param[in] ncorners 2 or 4 for the representation of each box * \return pta with %ncorners points for each box in the boxa, * or NULL on error * * <pre> * Notes: * (1) If ncorners == 2, we select the UL and LR corners. * Otherwise we save all 4 corners in this order: UL, UR, LL, LR. * (2) Other boxa --> pta functions are: * * boxaExtractAsPta(): allows extraction of any dimension * and/or side location, with each in a separate pta. * * boxaExtractCorners(): extracts any of the four corners as a pta. * </pre> */ PTA * boxaConvertToPta(BOXA *boxa, l_int32 ncorners) { l_int32 i, n; BOX *box; PTA *pta, *pta1; PROCNAME("boxaConvertToPta"); if (!boxa) return (PTA *)ERROR_PTR("boxa not defined", procName, NULL); if (ncorners != 2 && ncorners != 4) return (PTA *)ERROR_PTR("ncorners not 2 or 4", procName, NULL); n = boxaGetCount(boxa); if ((pta = ptaCreate(n)) == NULL) return (PTA *)ERROR_PTR("pta not made", procName, NULL); for (i = 0; i < n; i++) { box = boxaGetBox(boxa, i, L_COPY); pta1 = boxConvertToPta(box, ncorners); ptaJoin(pta, pta1, 0, -1); boxDestroy(&box); ptaDestroy(&pta1); } return pta; }
/*! * boxaWriteStream() * * Input: stream * boxa * Return: 0 if OK, 1 on error */ l_int32 boxaWriteStream(FILE *fp, BOXA *boxa) { l_int32 n, i; BOX *box; PROCNAME("boxaWriteStream"); if (!fp) return ERROR_INT("stream not defined", procName, 1); if (!boxa) return ERROR_INT("boxa not defined", procName, 1); n = boxaGetCount(boxa); fprintf(fp, "\nBoxa Version %d\n", BOXA_VERSION_NUMBER); fprintf(fp, "Number of boxes = %d\n", n); for (i = 0; i < n; i++) { if ((box = boxaGetBox(boxa, i, L_CLONE)) == NULL) return ERROR_INT("box not found", procName, 1); fprintf(fp, " Box[%d]: x = %d, y = %d, w = %d, h = %d\n", i, box->x, box->y, box->w, box->h); boxDestroy(&box); } return 0; }
/*! * boxaSplitEvenOdd() * * Input: boxa * &boxae, &boxao (<return> save even and odd boxes in their * separate boxa, setting the other type to invalid boxes.) * Return: 0 if OK, 1 on error * * Notes: * (1) For example, boxae copies of the even boxes, in their original * location, that are in boxa. Invalid boxes are placed * in the odd array locations. * */ l_int32 boxaSplitEvenOdd(BOXA *boxa, BOXA **pboxae, BOXA **pboxao) { l_int32 i, n; BOX *box, *boxt; PROCNAME("boxaSplitEvenOdd"); if (!pboxae || !pboxao) return ERROR_INT("&boxae and &boxao not defined", procName, 1); *pboxae = *pboxao = NULL; if (!boxa) return ERROR_INT("boxa not defined", procName, 1); n = boxaGetCount(boxa); *pboxae = boxaCreate(n); *pboxao = boxaCreate(n); for (i = 0; i < n; i++) { box = boxaGetBox(boxa, i, L_COPY); boxt = boxCreate(0, 0, 0, 0); /* empty placeholder */ if ((i & 1) == 0) { boxaAddBox(*pboxae, box, L_INSERT); boxaAddBox(*pboxao, boxt, L_INSERT); } else { boxaAddBox(*pboxae, boxt, L_INSERT); boxaAddBox(*pboxao, box, L_INSERT); } } return 0; }
/*! * boxaGetBoxGeometry() * * Input: boxa * index (to the index-th box) * &x, &y, &w, &h (<optional return>; each can be null) * Return: 0 if OK, 1 on error */ l_int32 boxaGetBoxGeometry(BOXA *boxa, l_int32 index, l_int32 *px, l_int32 *py, l_int32 *pw, l_int32 *ph) { BOX *box; PROCNAME("boxaGetBoxGeometry"); if (px) *px = 0; if (py) *py = 0; if (pw) *pw = 0; if (ph) *ph = 0; if (!boxa) return ERROR_INT("boxa not defined", procName, 1); if (index < 0 || index >= boxa->n) return ERROR_INT("index not valid", procName, 1); if ((box = boxaGetBox(boxa, index, L_CLONE)) == NULL) return ERROR_INT("box not found!", procName, 1); boxGetGeometry(box, px, py, pw, ph); boxDestroy(&box); return 0; }
/*! * boxaTransform() * * Input: boxa * shiftx, shifty * scalex, scaley * Return: boxad, or null on error * * Notes: * (1) This is a very simple function that first shifts, then scales. */ BOXA * boxaTransform(BOXA *boxas, l_int32 shiftx, l_int32 shifty, l_float32 scalex, l_float32 scaley) { l_int32 i, n; BOX *boxs, *boxd; BOXA *boxad; PROCNAME("boxaTransform"); if (!boxas) return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); n = boxaGetCount(boxas); if ((boxad = boxaCreate(n)) == NULL) return (BOXA *)ERROR_PTR("boxad not made", procName, NULL); for (i = 0; i < n; i++) { if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) return (BOXA *)ERROR_PTR("boxs not found", procName, NULL); boxd = boxTransform(boxs, shiftx, shifty, scalex, scaley); boxDestroy(&boxs); boxaAddBox(boxad, boxd, L_INSERT); } return boxad; }
/*! * boxaSortByIndex() * * Input: boxas * naindex (na that maps from the new boxa to the input boxa) * Return: boxad (sorted), or null on error */ BOXA * boxaSortByIndex(BOXA *boxas, NUMA *naindex) { l_int32 i, n, index; BOX *box; BOXA *boxad; PROCNAME("boxaSortByIndex"); if (!boxas) return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); if (!naindex) return (BOXA *)ERROR_PTR("naindex not defined", procName, NULL); n = boxaGetCount(boxas); boxad = boxaCreate(n); for (i = 0; i < n; i++) { numaGetIValue(naindex, i, &index); box = boxaGetBox(boxas, index, L_COPY); boxaAddBox(boxad, box, L_INSERT); } return boxad; }
/*! * boxaRotateOrth() * * Input: boxa * w, h (of image in which the boxa is embedded) * rotation (0 = noop, 1 = 90 deg, 2 = 180 deg, 3 = 270 deg; * all rotations are clockwise) * Return: boxad, or null on error * * Notes: * (1) See boxRotateOrth() for details. */ BOXA * boxaRotateOrth(BOXA *boxas, l_int32 w, l_int32 h, l_int32 rotation) { l_int32 i, n; BOX *boxs, *boxd; BOXA *boxad; PROCNAME("boxaRotateOrth"); if (!boxas) return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); if (rotation == 0) return boxaCopy(boxas, L_COPY); if (rotation < 1 || rotation > 3) return (BOXA *)ERROR_PTR("rotation not in {0,1,2,3}", procName, NULL); n = boxaGetCount(boxas); if ((boxad = boxaCreate(n)) == NULL) return (BOXA *)ERROR_PTR("boxad not made", procName, NULL); for (i = 0; i < n; i++) { if ((boxs = boxaGetBox(boxas, i, L_CLONE)) == NULL) return (BOXA *)ERROR_PTR("boxs not found", procName, NULL); boxd = boxRotateOrth(boxs, w, h, rotation); boxDestroy(&boxs); boxaAddBox(boxad, boxd, L_INSERT); } return boxad; }
/*! * \brief pixQuadtreeMean() * * \param[in] pixs 8 bpp, no colormap * \param[in] nlevels in quadtree; max allowed depends on image size * \param[in] *pix_ma input mean accumulator; can be null * \param[out] *pfpixa mean values in quadtree * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) The returned fpixa has %nlevels of fpix, each containing * the mean values at its level. Level 0 has a * single value; level 1 has 4 values; level 2 has 16; etc. * </pre> */ l_int32 pixQuadtreeMean(PIX *pixs, l_int32 nlevels, PIX *pix_ma, FPIXA **pfpixa) { l_int32 i, j, w, h, size, n; l_float32 val; BOX *box; BOXA *boxa; BOXAA *baa; FPIX *fpix; PIX *pix_mac; PROCNAME("pixQuadtreeMean"); if (!pfpixa) return ERROR_INT("&fpixa not defined", procName, 1); *pfpixa = NULL; if (!pixs || pixGetDepth(pixs) != 8) return ERROR_INT("pixs not defined or not 8 bpp", procName, 1); pixGetDimensions(pixs, &w, &h, NULL); if (nlevels > quadtreeMaxLevels(w, h)) return ERROR_INT("nlevels too large for image", procName, 1); if (!pix_ma) pix_mac = pixBlockconvAccum(pixs); else pix_mac = pixClone(pix_ma); if (!pix_mac) return ERROR_INT("pix_mac not made", procName, 1); if ((baa = boxaaQuadtreeRegions(w, h, nlevels)) == NULL) { pixDestroy(&pix_mac); return ERROR_INT("baa not made", procName, 1); } *pfpixa = fpixaCreate(nlevels); for (i = 0; i < nlevels; i++) { boxa = boxaaGetBoxa(baa, i, L_CLONE); size = 1 << i; n = boxaGetCount(boxa); /* n == size * size */ fpix = fpixCreate(size, size); for (j = 0; j < n; j++) { box = boxaGetBox(boxa, j, L_CLONE); pixMeanInRectangle(pixs, box, pix_mac, &val); fpixSetPixel(fpix, j % size, j / size, val); boxDestroy(&box); } fpixaAddFPix(*pfpixa, fpix, L_INSERT); boxaDestroy(&boxa); } pixDestroy(&pix_mac); boxaaDestroy(&baa); return 0; }
void tesser::ocrThread(QString filename) { char *text; QImage image(filename); if (image.isNull()) return; qDebug() << image; QImage g=image.convertToFormat(QImage::Format_Grayscale8); qDebug() << g; if (g.isNull()) return; m_tess->SetImage(g.bits(), g.width(), g.height(), 1, g.bytesPerLine()); m_tess->SetSourceResolution(300); m_tess->DetectOS(0); m_text.clear(); #if 1 Boxa* boxes = m_tess->GetComponentImages(tesseract::RIL_BLOCK, true, NULL, NULL); qDebug() << "Textline image components found: " << boxes->n; for (int i = 0; i < boxes->n; i++) { BOX* box = boxaGetBox(boxes, i, L_CLONE); m_tess->SetRectangle(box->x, box->y, box->w, box->h); text = m_tess->GetUTF8Text(); int conf = m_tess->MeanTextConf(); fprintf(stderr, "Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s", i, box->x, box->y, box->w, box->h, conf, text); if (conf>m_min_conf) m_text.append(text); else qDebug() << "Confidence under " << m_min_conf << " skipping"; } #else m_tess->Recognize(0); text=m_tess->GetUTF8Text(); qDebug() << m_tess->MeanTextConf(); m_text.append(text); #endif qDebug() << m_text; m_tess->Clear(); }
int main(int argc, char **argv) { char *dirin, *dirout, *infile, *outfile, *tail; l_int32 i, nfiles, border, x, y, w, h, xb, yb, wb, hb; BOX *box1, *box2; BOXA *boxa1, *boxa2; PIX *pixs, *pixt1, *pixd; SARRAY *safiles; static char mainName[] = "croptext"; if (argc != 4) return ERROR_INT("Syntax: croptext dirin border dirout", mainName, 1); dirin = argv[1]; border = atoi(argv[2]); dirout = argv[3]; setLeptDebugOK(1); safiles = getSortedPathnamesInDirectory(dirin, NULL, 0, 0); nfiles = sarrayGetCount(safiles); for (i = 0; i < nfiles; i++) { infile = sarrayGetString(safiles, i, L_NOCOPY); splitPathAtDirectory(infile, NULL, &tail); outfile = genPathname(dirout, tail); pixs = pixRead(infile); pixt1 = pixMorphSequence(pixs, "r11 + c10.40 + o5.5 + x4", 0); boxa1 = pixConnComp(pixt1, NULL, 8); if (boxaGetCount(boxa1) == 0) { fprintf(stderr, "Warning: no components on page %s\n", tail); continue; } boxa2 = boxaSort(boxa1, L_SORT_BY_AREA, L_SORT_DECREASING, NULL); box1 = boxaGetBox(boxa2, 0, L_CLONE); boxGetGeometry(box1, &x, &y, &w, &h); xb = L_MAX(0, x - border); yb = L_MAX(0, y - border); wb = w + 2 * border; hb = h + 2 * border; box2 = boxCreate(xb, yb, wb, hb); pixd = pixClipRectangle(pixs, box2, NULL); pixWrite(outfile, pixd, IFF_TIFF_G4); pixDestroy(&pixs); pixDestroy(&pixt1); pixDestroy(&pixd); boxaDestroy(&boxa1); boxaDestroy(&boxa2); } return 0; }
// This method returns the computed mode-height of blobs in the pix. // It also prunes very small blobs from calculation. int ShiroRekhaSplitter::GetModeHeight(Pix* pix) { Boxa* boxa = pixConnComp(pix, NULL, 8); STATS heights(0, pixGetHeight(pix)); heights.clear(); for (int i = 0; i < boxaGetCount(boxa); ++i) { Box* box = boxaGetBox(boxa, i, L_CLONE); if (box->h >= 3 || box->w >= 3) { heights.add(box->h, 1); } boxDestroy(&box); } boxaDestroy(&boxa); return heights.mode(); }
l_int32 renderTransformedBoxa(PIX *pixt, BOXA *boxa, l_int32 i) { l_int32 j, n, rval, gval, bval; BOX *box; n = boxaGetCount(boxa); rval = (1413 * i) % 256; gval = (4917 * i) % 256; bval = (7341 * i) % 256; for (j = 0; j < n; j++) { box = boxaGetBox(boxa, j, L_CLONE); pixRenderHashBoxArb(pixt, box, 10, 3, i % 4, 1, rval, gval, bval); boxDestroy(&box); } return 0; }
/*! * pixaCreateFromBoxa() * * Input: pixs * boxa * &cropwarn (<optional return> TRUE if the boxa extent * is larger than pixs. * Return: pixad, or null on error * * Notes: * (1) This simply extracts from pixs the region corresponding to each * box in the boxa. * (2) The 3rd arg is optional. If the extent of the boxa exceeds the * size of the pixa, so that some boxes are either clipped * or entirely outside the pix, a warning is returned as TRUE. * (3) pixad will have only the properly clipped elements, and * the internal boxa will be correct. */ PIXA * pixaCreateFromBoxa(PIX *pixs, BOXA *boxa, l_int32 *pcropwarn) { l_int32 i, n, w, h, wbox, hbox, cropwarn; BOX *box, *boxc; PIX *pixd; PIXA *pixad; PROCNAME("pixaCreateFromBoxa"); if (!pixs) return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); if (!boxa) return (PIXA *)ERROR_PTR("boxa not defined", procName, NULL); n = boxaGetCount(boxa); if ((pixad = pixaCreate(n)) == NULL) return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); boxaGetExtent(boxa, &wbox, &hbox, NULL); pixGetDimensions(pixs, &w, &h, NULL); cropwarn = FALSE; if (wbox > w || hbox > h) cropwarn = TRUE; if (pcropwarn) *pcropwarn = cropwarn; for (i = 0; i < n; i++) { box = boxaGetBox(boxa, i, L_COPY); if (cropwarn) { /* if box is outside pixs, pixd is NULL */ pixd = pixClipRectangle(pixs, box, &boxc); /* may be NULL */ if (pixd) { pixaAddPix(pixad, pixd, L_INSERT); pixaAddBox(pixad, boxc, L_INSERT); } boxDestroy(&box); } else { pixd = pixClipRectangle(pixs, box, NULL); pixaAddPix(pixad, pixd, L_INSERT); pixaAddBox(pixad, box, L_INSERT); } } return pixad; }
/*! * boxaaFlattenToBoxa() * * Input: boxaa * &naindex (<optional return> the boxa index in the boxaa) * copyflag (L_COPY or L_CLONE) * Return: boxa, or null on error * * Notes: * (1) This 'flattens' the boxaa to a boxa, taking the boxes in * order in the first boxa, then the second, etc. * (2) If a boxa is empty, we generate an invalid, placeholder box * of zero size. This is useful when converting from a boxaa * where each boxa has either 0 or 1 boxes, and it is necessary * to maintain a 1:1 correspondence between the initial * boxa array and the resulting box array. * (3) If &naindex is defined, we generate a Numa that gives, for * each box in the boxaa, the index of the boxa to which it belongs. */ BOXA * boxaaFlattenToBoxa(BOXAA *baa, NUMA **pnaindex, l_int32 copyflag) { l_int32 i, j, m, n; BOXA *boxa, *boxat; BOX *box; NUMA *naindex; PROCNAME("boxaaFlattenToBoxa"); if (pnaindex) *pnaindex = NULL; if (!baa) return (BOXA *)ERROR_PTR("baa not defined", procName, NULL); if (copyflag != L_COPY && copyflag != L_CLONE) return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL); if (pnaindex) { naindex = numaCreate(0); *pnaindex = naindex; } n = boxaaGetCount(baa); boxa = boxaCreate(n); for (i = 0; i < n; i++) { boxat = boxaaGetBoxa(baa, i, L_CLONE); m = boxaGetCount(boxat); if (m == 0) { /* placeholder box */ box = boxCreate(0, 0, 0, 0); boxaAddBox(boxa, box, L_INSERT); if (pnaindex) numaAddNumber(naindex, i); /* save 'row' number */ } else { for (j = 0; j < m; j++) { box = boxaGetBox(boxat, j, copyflag); boxaAddBox(boxa, box, L_INSERT); if (pnaindex) numaAddNumber(naindex, i); /* save 'row' number */ } } boxaDestroy(&boxat); } return boxa; }
/* static */ void BoxChar::RotateBoxes(float rotation, int xcenter, int ycenter, int start_box, int end_box, vector<BoxChar*>* boxes) { Boxa* orig = boxaCreate(0); for (int i = start_box; i < end_box; ++i) { BOX* box = (*boxes)[i]->box_; if (box) boxaAddBox(orig, box, L_CLONE); } Boxa* rotated = boxaRotate(orig, xcenter, ycenter, rotation); boxaDestroy(&orig); for (int i = start_box, box_ind = 0; i < end_box; ++i) { if ((*boxes)[i]->box_) { boxDestroy(&((*boxes)[i]->box_)); (*boxes)[i]->box_ = boxaGetBox(rotated, box_ind++, L_CLONE); } } boxaDestroy(&rotated); }
/*! * boxaaGetBox() * * Input: baa * iboxa (index into the boxa array in the boxaa) * ibox (index into the box array in the boxa) * accessflag (L_COPY or L_CLONE) * Return: box, or null on error */ BOX * boxaaGetBox(BOXAA *baa, l_int32 iboxa, l_int32 ibox, l_int32 accessflag) { BOX *box; BOXA *boxa; PROCNAME("boxaaGetBox"); if ((boxa = boxaaGetBoxa(baa, iboxa, L_CLONE)) == NULL) return (BOX *)ERROR_PTR("boxa not retrieved", procName, NULL); if ((box = boxaGetBox(boxa, ibox, accessflag)) == NULL) L_ERROR("box not retrieved", procName); boxaDestroy(&boxa); return box; }
// Helper erases false-positive line segments from the input/output line_pix. // 1. Since thick lines shouldn't really break up, we can eliminate some false // positives by marking segments that are at least kMinThickLineWidth // thickness, yet have a length less than min_thick_length. // 2. Lines that don't have at least 2 intersections with other lines and have // a lot of neighbouring non-lines are probably not lines (perhaps arabic // or Hindi words, or underlines.) // Bad line components are erased from line_pix. // Returns the number of remaining connected components. static int FilterFalsePositives(int resolution, Pix* nonline_pix, Pix* intersection_pix, Pix* line_pix) { int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple); Pixa* pixa = NULL; Boxa* boxa = pixConnComp(line_pix, &pixa, 8); // Iterate over the boxes to remove false positives. int nboxes = boxaGetCount(boxa); int remaining_boxes = nboxes; for (int i = 0; i < nboxes; ++i) { Box* box = boxaGetBox(boxa, i, L_CLONE); l_int32 x, y, box_width, box_height; boxGetGeometry(box, &x, &y, &box_width, &box_height); Pix* comp_pix = pixaGetPix(pixa, i, L_CLONE); int max_width = MaxStrokeWidth(comp_pix); pixDestroy(&comp_pix); bool bad_line = false; // If the length is too short to stand-alone as a line, and the box width // is thick enough, and the stroke width is thick enough it is bad. if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth && box_width < min_thick_length && box_height < min_thick_length && max_width > kMinThickLineWidth) { // Too thick for the length. bad_line = true; } if (!bad_line && (intersection_pix == NULL || NumTouchingIntersections(box, intersection_pix) < 2)) { // Test non-line density near the line. int nonline_count = CountPixelsAdjacentToLine(max_width, box, nonline_pix); if (nonline_count > box_height * box_width * kMaxNonLineDensity) bad_line = true; } if (bad_line) { // Not a good line. pixClearInRect(line_pix, box); --remaining_boxes; } boxDestroy(&box); } pixaDestroy(&pixa); boxaDestroy(&boxa); return remaining_boxes; }
/*! * \brief boxaExtractSortedPattern() * * \param[in] boxa typ. of word bounding boxes, in textline order * \param[in] na index of textline for each box in boxa * \return naa NUMAA, where each numa represents one textline, * or NULL on error * * <pre> * Notes: * (1) The input is expected to come from pixGetWordBoxesInTextlines(). * (2) Each numa in the output consists of an average y coordinate * of the first box in the textline, followed by pairs of * x coordinates representing the left and right edges of each * of the boxes in the textline. * </pre> */ NUMAA * boxaExtractSortedPattern(BOXA *boxa, NUMA *na) { l_int32 index, nbox, row, prevrow, x, y, w, h; BOX *box; NUMA *nad; NUMAA *naa; PROCNAME("boxaExtractSortedPattern"); if (!boxa) return (NUMAA *)ERROR_PTR("boxa not defined", procName, NULL); if (!na) return (NUMAA *)ERROR_PTR("na not defined", procName, NULL); naa = numaaCreate(0); nbox = boxaGetCount(boxa); if (nbox == 0) return naa; prevrow = -1; for (index = 0; index < nbox; index++) { box = boxaGetBox(boxa, index, L_CLONE); numaGetIValue(na, index, &row); if (row > prevrow) { if (index > 0) numaaAddNuma(naa, nad, L_INSERT); nad = numaCreate(0); prevrow = row; boxGetGeometry(box, NULL, &y, NULL, &h); numaAddNumber(nad, y + h / 2); } boxGetGeometry(box, &x, NULL, &w, NULL); numaAddNumber(nad, x); numaAddNumber(nad, x + w - 1); boxDestroy(&box); } numaaAddNuma(naa, nad, L_INSERT); return naa; }
/*! * boxaGetValidBox() * * Input: boxa * index (to the index-th box) * accessflag (L_COPY or L_CLONE) * Return: box, or null if box is not valid or on error * * Notes: * (1) This returns NULL for an invalid box in a boxa. * For a box to be valid, both the width and height must be > 0. * (2) We allow invalid boxes, with w = 0 or h = 0, as placeholders * in boxa for which the index of the box in the boxa is important. * This is an atypical situation; usually you want to put only * valid boxes in a boxa. */ BOX * boxaGetValidBox(BOXA *boxa, l_int32 index, l_int32 accessflag) { l_int32 w, h; BOX *box; PROCNAME("boxaGetValidBox"); if (!boxa) return (BOX *)ERROR_PTR("boxa not defined", procName, NULL); if ((box = boxaGetBox(boxa, index, accessflag)) == NULL) return (BOX *)ERROR_PTR("box not returned", procName, NULL); boxGetGeometry(box, NULL, NULL, &w, &h); if (w <= 0 || h <= 0) /* not valid, but not necessarily an error */ boxDestroy(&box); return box; }
/*! * boxaSort2dByIndex() * * Input: boxas * naa (numaa that maps from the new baa to the input boxa) * Return: baa (sorted boxaa), or null on error */ BOXAA * boxaSort2dByIndex(BOXA *boxas, NUMAA *naa) { l_int32 ntot, boxtot, i, j, n, nn, index; BOX *box; BOXA *boxa; BOXAA *baa; NUMA *na; PROCNAME("boxaSort2dByIndex"); if (!boxas) return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL); if (!naa) return (BOXAA *)ERROR_PTR("naindex not defined", procName, NULL); /* Check counts */ ntot = numaaGetNumberCount(naa); boxtot = boxaGetCount(boxas); if (ntot != boxtot) return (BOXAA *)ERROR_PTR("element count mismatch", procName, NULL); n = numaaGetCount(naa); baa = boxaaCreate(n); for (i = 0; i < n; i++) { na = numaaGetNuma(naa, i, L_CLONE); nn = numaGetCount(na); boxa = boxaCreate(nn); for (j = 0; j < nn; j++) { numaGetIValue(na, i, &index); box = boxaGetBox(boxas, index, L_COPY); boxaAddBox(boxa, box, L_INSERT); } boxaaAddBoxa(baa, boxa, L_INSERT); numaDestroy(&na); } return baa; }
/*! * \brief boxaSelectRange() * * \param[in] boxas * \param[in] first use 0 to select from the beginning * \param[in] last use -1 to select to the end * \param[in] copyflag L_COPY, L_CLONE * \return boxad, or NULL on error * * <pre> * Notes: * (1) The copyflag specifies what we do with each box from boxas. * Specifically, L_CLONE inserts a clone into boxad of each * selected box from boxas. * </pre> */ BOXA * boxaSelectRange(BOXA *boxas, l_int32 first, l_int32 last, l_int32 copyflag) { l_int32 n, nbox, i; BOX *box; BOXA *boxad; PROCNAME("boxaSelectRange"); if (!boxas) return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); if (copyflag != L_COPY && copyflag != L_CLONE) return (BOXA *)ERROR_PTR("invalid copyflag", procName, NULL); if ((n = boxaGetCount(boxas)) == 0) { L_WARNING("boxas is empty\n", procName); return boxaCopy(boxas, copyflag); } first = L_MAX(0, first); if (last < 0) last = n - 1; if (first >= n) return (BOXA *)ERROR_PTR("invalid first", procName, NULL); if (last >= n) { L_WARNING("last = %d is beyond max index = %d; adjusting\n", procName, last, n - 1); last = n - 1; } if (first > last) return (BOXA *)ERROR_PTR("first > last", procName, NULL); nbox = last - first + 1; boxad = boxaCreate(nbox); for (i = first; i <= last; i++) { box = boxaGetBox(boxas, i, copyflag); boxaAddBox(boxad, box, L_INSERT); } return boxad; }