/*! * pixaReplacePix() * * Input: pixa * index (to the index-th pix) * pix (insert to replace existing one) * box (<optional> insert to replace existing) * Return: 0 if OK, 1 on error * * Notes: * (1) In-place replacement of one pix. * (2) The previous pix at that location is destroyed. */ l_int32 pixaReplacePix(PIXA *pixa, l_int32 index, PIX *pix, BOX *box) { BOXA *boxa; PROCNAME("pixaReplacePix"); if (!pixa) return ERROR_INT("pixa not defined", procName, 1); if (index < 0 || index >= pixa->n) return ERROR_INT("index not valid", procName, 1); if (!pix) return ERROR_INT("pix not defined", procName, 1); pixDestroy(&(pixa->pix[index])); pixa->pix[index] = pix; if (box) { boxa = pixa->boxa; if (index > boxa->n) return ERROR_INT("boxa index not valid", procName, 1); boxaReplaceBox(boxa, index, box); } return 0; }
/*! * pixaAddBorderGeneral() * * Input: pixad (can be null or equal to pixas) * pixas (containing pix of all depths; colormap ok) * left, right, top, bot (number of pixels added) * val (value of added border pixels) * Return: pixad (with border added to each pix), including on error * * Notes: * (1) For binary images: * white: val = 0 * black: val = 1 * For grayscale images: * white: val = 2 ** d - 1 * black: val = 0 * For rgb color images: * white: val = 0xffffff00 * black: val = 0 * For colormapped images, use 'index' found this way: * white: pixcmapGetRankIntensity(cmap, 1.0, &index); * black: pixcmapGetRankIntensity(cmap, 0.0, &index); * (2) For in-place replacement of each pix with a bordered version, * use @pixad = @pixas. To make a new pixa, use @pixad = NULL. * (3) In both cases, the boxa has sides adjusted as if it were * expanded by the border. */ PIXA * pixaAddBorderGeneral(PIXA *pixad, PIXA *pixas, l_int32 left, l_int32 right, l_int32 top, l_int32 bot, l_uint32 val) { l_int32 i, n, nbox; BOX *box; BOXA *boxad; PIX *pixs, *pixd; PROCNAME("pixaAddBorderGeneral"); if (!pixas) return (PIXA *)ERROR_PTR("pixas not defined", procName, pixad); if (left < 0 || right < 0 || top < 0 || bot < 0) return (PIXA *)ERROR_PTR("negative border added!", procName, pixad); if (pixad && (pixad != pixas)) return (PIXA *)ERROR_PTR("pixad defined but != pixas", procName, pixad); n = pixaGetCount(pixas); if (!pixad) pixad = pixaCreate(n); for (i = 0; i < n; i++) { pixs = pixaGetPix(pixas, i, L_CLONE); pixd = pixAddBorderGeneral(pixs, left, right, top, bot, val); if (pixad == pixas) /* replace */ pixaReplacePix(pixad, i, pixd, NULL); else pixaAddPix(pixad, pixd, L_INSERT); pixDestroy(&pixs); } nbox = pixaGetBoxaCount(pixas); boxad = pixaGetBoxa(pixad, L_CLONE); for (i = 0; i < nbox; i++) { if ((box = pixaGetBox(pixas, i, L_COPY)) == NULL) { L_WARNING_INT("box %d not found", procName, i); break; } boxAdjustSides(box, box, -left, right, -top, bot); if (pixad == pixas) /* replace */ boxaReplaceBox(boxad, i, box); else boxaAddBox(boxad, box, L_INSERT); } boxaDestroy(&boxad); return pixad; }
// Get a set of bounding boxes of possible horizontal lines in the image. // The input resolution overrides any resolution set in src_pix. // The output line_pix contains just all the detected lines. // The output boxes undergo the transformation (x,y)->(height-y,x) so the // lines can be found with a vertical line finder afterwards. // This transformation allows a simple x/y flip to reverse it in tesseract // coordinates and it is faster to flip the lines than rotate the image. Boxa* LineFinder::GetHLineBoxes(int resolution, Pix* src_pix, Pix** line_pix) { #ifdef HAVE_LIBLEPT // Remove any parts of 1 inch/kThinLineFraction high or more, by opening // away the thin lines and subtracting what's left. // This is very generous and will leave in even quite wide lines. Pix* pixt1 = pixOpenBrick(NULL, src_pix, 1, resolution / kThinLineFraction); pixSubtract(pixt1, src_pix, pixt1); // Spread vertically to allow for some skew. Pix* pixt2 = pixDilateBrick(NULL, pixt1, 1, 3); // Now keep only wide stuff of width at least 1 inch/kMinLineLengthFraction. pixOpenBrick(pixt1, pixt2, resolution / kMinLineLengthFraction, 1); pixDestroy(&pixt2); // Put a single pixel crack in every line at an arbitrary spacing, // so they break up and the bounding boxes can be used to get the // direction accurately enough without needing outlines. int wpl = pixGetWpl(pixt1); int width = pixGetWidth(pixt1); int height = pixGetHeight(pixt1); l_uint32* data = pixGetData(pixt1); for (int y = 0; y < height; ++y, data += wpl) { for (int x = kCrackSpacing; x < width; x += kCrackSpacing) { CLEAR_DATA_BIT(data, x); } } if (textord_tabfind_show_vlines) pixWrite("hlines.png", pixt1, IFF_PNG); Boxa* boxa = pixConnComp(pixt1, NULL, 8); *line_pix = pixt1; // Iterate the boxes to flip x and y. int nboxes = boxaGetCount(boxa); for (int i = 0; i < nboxes; ++i) { l_int32 x, y, box_width, box_height; boxaGetBoxGeometry(boxa, i, &x, &y, &box_width, &box_height); Box* box = boxCreate(height - (y + box_height), width - (x + box_width), box_height, box_width); boxaReplaceBox(boxa, i, box); } return boxa; #else return NULL; #endif }
/*! * boxaInitFull() * * Input: boxa (typically empty) * box (to be replicated into the entire ptr array) * Return: 0 if OK, 1 on error * * Notes: * (1) This initializes a boxa by filling up the entire box ptr array * with copies of @box. Any existing boxes are destroyed. * After this oepration, the number of boxes is equal to * the number of allocated ptrs. * (2) Note that we use boxaReplaceBox() instead of boxaInsertBox(). * They both have the same effect when inserting into a NULL ptr * in the boxa ptr array: * (3) Example usage. This function is useful to prepare for a * random insertion (or replacement) of boxes into a boxa. * To randomly insert boxes into a boxa, up to some index "max": * Boxa *boxa = boxaCreate(max); * Box *box = boxCreate(...); * boxaInitFull(boxa, box); * If we have an existing boxa with a smaller ptr array, it can * be reused: * boxaExtendArrayToSize(boxa, max); * Box *box = boxCreate(...); * boxaInitFull(boxa, box); * The initialization allows the boxa to always be properly * filled, even if all the boxes are not later replaced. * If you want to know which boxes have been replaced, you can * initialize the array with invalid boxes that have * w = 0 and/or h = 0. Then boxaGetValidBox() will return * NULL for the invalid boxes. */ l_int32 boxaInitFull(BOXA *boxa, BOX *box) { l_int32 i, n; BOX *boxt; PROCNAME("boxaInitFull"); if (!boxa) return ERROR_INT("boxa not defined", procName, 1); if (!box) return ERROR_INT("box not defined", procName, 1); n = boxa->nalloc; boxa->n = n; for (i = 0; i < n; i++) { boxt = boxCopy(box); boxaReplaceBox(boxa, i, boxt); } return 0; }
/*! * boxaCombineOverlaps() * * Input: boxas * Return: boxad (where each set of boxes in boxas that overlap are * combined into a single bounding box in boxad), or * null on error. * * Notes: * (1) If there are no overlapping boxes, it simply returns a copy * of @boxas. * (2) The alternative method of painting each rectanle and finding * the 4-connected components gives the wrong result, because * two non-overlapping rectangles, when rendered, can still * be 4-connected, and hence they will be joined. * (3) A bad case is to have n boxes, none of which overlap. * Then you have one iteration with O(n^2) compares. This * is still faster than painting each rectangle and finding * the connected components, even for thousands of rectangles. */ BOXA * boxaCombineOverlaps(BOXA *boxas) { l_int32 i, j, n1, n2, inter, interfound, niters; BOX *box1, *box2, *box3; BOXA *boxat1, *boxat2; PROCNAME("boxaCombineOverlaps"); if (!boxas) return (BOXA *)ERROR_PTR("boxas not defined", procName, NULL); boxat1 = boxaCopy(boxas, L_COPY); n1 = boxaGetCount(boxat1); niters = 0; /* fprintf(stderr, "%d iters: %d boxes\n", niters, n1); */ while (1) { /* loop until no change from previous iteration */ niters++; boxat2 = boxaCreate(n1); for (i = 0; i < n1; i++) { box1 = boxaGetBox(boxat1, i, L_COPY); if (i == 0) { boxaAddBox(boxat2, box1, L_INSERT); continue; } n2 = boxaGetCount(boxat2); /* Now test box1 against all boxes already put in boxat2. * If it is found to intersect with an existing box, * replace that box by the union of the two boxes, * and break to the outer loop. If no overlap is * found, add box1 to boxat2. */ interfound = FALSE; for (j = 0; j < n2; j++) { box2 = boxaGetBox(boxat2, j, L_CLONE); boxIntersects(box1, box2, &inter); if (inter == 1) { box3 = boxBoundingRegion(box1, box2); boxaReplaceBox(boxat2, j, box3); boxDestroy(&box1); boxDestroy(&box2); interfound = TRUE; break; } boxDestroy(&box2); } if (interfound == FALSE) boxaAddBox(boxat2, box1, L_INSERT); } n2 = boxaGetCount(boxat2); /* fprintf(stderr, "%d iters: %d boxes\n", niters, n2); */ if (n2 == n1) /* we're done */ break; else { n1 = n2; boxaDestroy(&boxat1); boxat1 = boxat2; } } boxaDestroy(&boxat1); return boxat2; }