/*! * pixGetRegionsBinary() * * Input: pixs (1 bpp, assumed to be 300 to 400 ppi) * &pixhm (<optional return> halftone mask) * &pixtm (<optional return> textline mask) * &pixtb (<optional return> textblock mask) * debug (flag: set to 1 for debug output) * Return: 0 if OK, 1 on error * * Notes: * (1) It is best to deskew the image before segmenting. * (2) The debug flag enables a number of outputs. These * are included to show how to generate and save/display * these results. */ l_int32 pixGetRegionsBinary(PIX *pixs, PIX **ppixhm, PIX **ppixtm, PIX **ppixtb, l_int32 debug) { char *tempname; l_int32 htfound, tlfound; PIX *pixr, *pixt1, *pixt2; PIX *pixtext; /* text pixels only */ PIX *pixhm2; /* halftone mask; 2x reduction */ PIX *pixhm; /* halftone mask; */ PIX *pixtm2; /* textline mask; 2x reduction */ PIX *pixtm; /* textline mask */ PIX *pixvws; /* vertical white space mask */ PIX *pixtb2; /* textblock mask; 2x reduction */ PIX *pixtbf2; /* textblock mask; 2x reduction; small comps filtered */ PIX *pixtb; /* textblock mask */ PROCNAME("pixGetRegionsBinary"); if (ppixhm) *ppixhm = NULL; if (ppixtm) *ppixtm = NULL; if (ppixtb) *ppixtb = NULL; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (pixGetDepth(pixs) != 1) return ERROR_INT("pixs not 1 bpp", procName, 1); /* 2x reduce, to 150 -200 ppi */ pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); pixDisplayWrite(pixr, debug); /* Get the halftone mask */ pixhm2 = pixGenHalftoneMask(pixr, &pixtext, &htfound, debug); /* Get the textline mask from the text pixels */ pixtm2 = pixGenTextlineMask(pixtext, &pixvws, &tlfound, debug); /* Get the textblock mask from the textline mask */ pixtb2 = pixGenTextblockMask(pixtm2, pixvws, debug); pixDestroy(&pixr); pixDestroy(&pixtext); pixDestroy(&pixvws); /* Remove small components from the mask, where a small * component is defined as one with both width and height < 60 */ pixtbf2 = pixSelectBySize(pixtb2, 60, 60, 4, L_SELECT_IF_EITHER, L_SELECT_IF_GTE, NULL); pixDestroy(&pixtb2); pixDisplayWriteFormat(pixtbf2, debug, IFF_PNG); /* Expand all masks to full resolution, and do filling or * small dilations for better coverage. */ pixhm = pixExpandReplicate(pixhm2, 2); pixt1 = pixSeedfillBinary(NULL, pixhm, pixs, 8); pixOr(pixhm, pixhm, pixt1); pixDestroy(&pixt1); pixDisplayWriteFormat(pixhm, debug, IFF_PNG); pixt1 = pixExpandReplicate(pixtm2, 2); pixtm = pixDilateBrick(NULL, pixt1, 3, 3); pixDestroy(&pixt1); pixDisplayWriteFormat(pixtm, debug, IFF_PNG); pixt1 = pixExpandReplicate(pixtbf2, 2); pixtb = pixDilateBrick(NULL, pixt1, 3, 3); pixDestroy(&pixt1); pixDisplayWriteFormat(pixtb, debug, IFF_PNG); pixDestroy(&pixhm2); pixDestroy(&pixtm2); pixDestroy(&pixtbf2); /* Debug: identify objects that are neither text nor halftone image */ if (debug) { pixt1 = pixSubtract(NULL, pixs, pixtm); /* remove text pixels */ pixt2 = pixSubtract(NULL, pixt1, pixhm); /* remove halftone pixels */ pixDisplayWriteFormat(pixt2, 1, IFF_PNG); pixDestroy(&pixt1); pixDestroy(&pixt2); } /* Debug: display textline components with random colors */ if (debug) { l_int32 w, h; BOXA *boxa; PIXA *pixa; boxa = pixConnComp(pixtm, &pixa, 8); pixGetDimensions(pixtm, &w, &h, NULL); pixt1 = pixaDisplayRandomCmap(pixa, w, h); pixcmapResetColor(pixGetColormap(pixt1), 0, 255, 255, 255); pixDisplay(pixt1, 100, 100); pixDisplayWriteFormat(pixt1, 1, IFF_PNG); pixaDestroy(&pixa); boxaDestroy(&boxa); pixDestroy(&pixt1); } /* Debug: identify the outlines of each textblock */ if (debug) { PIXCMAP *cmap; PTAA *ptaa; ptaa = pixGetOuterBordersPtaa(pixtb); tempname = genTempFilename("/tmp", "tb_outlines.ptaa", 0); ptaaWrite(tempname, ptaa, 1); FREE(tempname); pixt1 = pixRenderRandomCmapPtaa(pixtb, ptaa, 1, 16, 1); cmap = pixGetColormap(pixt1); pixcmapResetColor(cmap, 0, 130, 130, 130); pixDisplay(pixt1, 500, 100); pixDisplayWriteFormat(pixt1, 1, IFF_PNG); pixDestroy(&pixt1); ptaaDestroy(&ptaa); } /* Debug: get b.b. for all mask components */ if (debug) { BOXA *bahm, *batm, *batb; bahm = pixConnComp(pixhm, NULL, 4); batm = pixConnComp(pixtm, NULL, 4); batb = pixConnComp(pixtb, NULL, 4); tempname = genTempFilename("/tmp", "htmask.boxa", 0); boxaWrite(tempname, bahm); FREE(tempname); tempname = genTempFilename("/tmp", "textmask.boxa", 0); boxaWrite(tempname, batm); FREE(tempname); tempname = genTempFilename("/tmp", "textblock.boxa", 0); boxaWrite(tempname, batb); FREE(tempname); boxaDestroy(&bahm); boxaDestroy(&batm); boxaDestroy(&batb); } if (ppixhm) *ppixhm = pixhm; else pixDestroy(&pixhm); if (ppixtm) *ppixtm = pixtm; else pixDestroy(&pixtm); if (ppixtb) *ppixtb = pixtb; else pixDestroy(&pixtb); return 0; }
/*! * pixMorphCompSequence() * * Input: pixs * sequence (string specifying sequence) * dispsep (controls debug display of each result in the sequence: * 0: no output * > 0: gives horizontal separation in pixels between * successive displays * < 0: pdf output; abs(dispsep) is used for naming) * Return: pixd, or null on error * * Notes: * (1) This does rasterop morphology on binary images, using composite * operations for extra speed on large Sels. * (2) Safe closing is used atomically. However, if you implement a * closing as a sequence with a dilation followed by an * erosion, it will not be safe, and to ensure that you have * no boundary effects you must add a border in advance and * remove it at the end. * (3) For other usage details, see the notes for pixMorphSequence(). * (4) The sequence string is formatted as follows: * - An arbitrary number of operations, each separated * by a '+' character. White space is ignored. * - Each operation begins with a case-independent character * specifying the operation: * d or D (dilation) * e or E (erosion) * o or O (opening) * c or C (closing) * r or R (rank binary reduction) * x or X (replicative binary expansion) * b or B (add a border of 0 pixels of this size) * - The args to the morphological operations are bricks of hits, * and are formatted as a.b, where a and b are horizontal and * vertical dimensions, rsp. * - The args to the reduction are a sequence of up to 4 integers, * each from 1 to 4. * - The arg to the expansion is a power of two, in the set * {2, 4, 8, 16}. */ PIX * pixMorphCompSequence(PIX *pixs, const char *sequence, l_int32 dispsep) { char *rawop, *op, *fname; char buf[256]; l_int32 nops, i, j, nred, fact, w, h, x, y, border, pdfout; l_int32 level[4]; PIX *pixt1, *pixt2; PIXA *pixa; SARRAY *sa; PROCNAME("pixMorphCompSequence"); if (!pixs) return (PIX *)ERROR_PTR("pixs not defined", procName, NULL); if (!sequence) return (PIX *)ERROR_PTR("sequence not defined", procName, NULL); /* Split sequence into individual operations */ sa = sarrayCreate(0); sarraySplitString(sa, sequence, "+"); nops = sarrayGetCount(sa); pdfout = (dispsep < 0) ? 1 : 0; if (!morphSequenceVerify(sa)) { sarrayDestroy(&sa); return (PIX *)ERROR_PTR("sequence not valid", procName, NULL); } /* Parse and operate */ pixa = NULL; if (pdfout) { pixa = pixaCreate(0); pixaAddPix(pixa, pixs, L_CLONE); snprintf(buf, sizeof(buf), "/tmp/seq_output_%d.pdf", L_ABS(dispsep)); fname = genPathname(buf, NULL); } border = 0; pixt1 = pixCopy(NULL, pixs); pixt2 = NULL; x = y = 0; for (i = 0; i < nops; i++) { rawop = sarrayGetString(sa, i, 0); op = stringRemoveChars(rawop, " \n\t"); switch (op[0]) { case 'd': case 'D': sscanf(&op[1], "%d.%d", &w, &h); pixt2 = pixDilateCompBrick(NULL, pixt1, w, h); pixSwapAndDestroy(&pixt1, &pixt2); break; case 'e': case 'E': sscanf(&op[1], "%d.%d", &w, &h); pixt2 = pixErodeCompBrick(NULL, pixt1, w, h); pixSwapAndDestroy(&pixt1, &pixt2); break; case 'o': case 'O': sscanf(&op[1], "%d.%d", &w, &h); pixOpenCompBrick(pixt1, pixt1, w, h); break; case 'c': case 'C': sscanf(&op[1], "%d.%d", &w, &h); pixCloseSafeCompBrick(pixt1, pixt1, w, h); break; case 'r': case 'R': nred = strlen(op) - 1; for (j = 0; j < nred; j++) level[j] = op[j + 1] - '0'; for (j = nred; j < 4; j++) level[j] = 0; pixt2 = pixReduceRankBinaryCascade(pixt1, level[0], level[1], level[2], level[3]); pixSwapAndDestroy(&pixt1, &pixt2); break; case 'x': case 'X': sscanf(&op[1], "%d", &fact); pixt2 = pixExpandReplicate(pixt1, fact); pixSwapAndDestroy(&pixt1, &pixt2); break; case 'b': case 'B': sscanf(&op[1], "%d", &border); pixt2 = pixAddBorder(pixt1, border, 0); pixSwapAndDestroy(&pixt1, &pixt2); break; default: /* All invalid ops are caught in the first pass */ break; } FREE(op); /* Debug output */ if (dispsep > 0) { pixDisplay(pixt1, x, y); x += dispsep; } if (pdfout) pixaAddPix(pixa, pixt1, L_COPY); } if (border > 0) { pixt2 = pixRemoveBorder(pixt1, border); pixSwapAndDestroy(&pixt1, &pixt2); } if (pdfout) { pixaConvertToPdf(pixa, 0, 1.0, L_FLATE_ENCODE, 0, fname, fname); FREE(fname); pixaDestroy(&pixa); } sarrayDestroy(&sa); return pixt1; }
/*! * \brief pixUpDownDetectGeneralDwa() * * \param[in] pixs 1 bpp, deskewed, English text * \param[out] pconf confidence that text is rightside-up * \param[in] mincount min number of up + down; use 0 for default * \param[in] npixels number of pixels removed from each side of word box * \param[in] debug 1 for debug output; 0 otherwise * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) See the notes in pixUpDownDetectGeneral() for usage. * </pre> */ l_int32 pixUpDownDetectGeneralDwa(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 npixels, l_int32 debug) { char flipsel1[] = "flipsel1"; char flipsel2[] = "flipsel2"; char flipsel3[] = "flipsel3"; char flipsel4[] = "flipsel4"; l_int32 countup, countdown, nmax; l_float32 nup, ndown; PIX *pixt, *pix0, *pix1, *pix2, *pix3, *pixm; PROCNAME("pixUpDownDetectGeneralDwa"); if (!pconf) return ERROR_INT("&conf not defined", procName, 1); *pconf = 0.0; if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); if (mincount == 0) mincount = DEFAULT_MIN_UP_DOWN_COUNT; if (npixels < 0) npixels = 0; lept_mkdir("lept/orient"); /* One of many reasonable pre-filtering sequences: (1, 8) and (30, 1). * This closes holes in x-height characters and joins them at * the x-height. There is more noise in the descender detection * from this, but it works fairly well. */ pixt = pixMorphSequenceDwa(pixs, "c1.8 + c30.1", 0); /* Be sure to add the border before the flip DWA operations! */ pix0 = pixAddBorderGeneral(pixt, ADDED_BORDER, ADDED_BORDER, ADDED_BORDER, ADDED_BORDER, 0); pixDestroy(&pixt); /* Optionally, make a mask of the word bounding boxes, shortening * each of them by a fixed amount at each end. */ pixm = NULL; if (npixels > 0) { l_int32 i, nbox, x, y, w, h; BOX *box; BOXA *boxa; pix1 = pixMorphSequenceDwa(pix0, "o10.1", 0); boxa = pixConnComp(pix1, NULL, 8); pixm = pixCreateTemplate(pix1); pixDestroy(&pix1); nbox = boxaGetCount(boxa); for (i = 0; i < nbox; i++) { box = boxaGetBox(boxa, i, L_CLONE); boxGetGeometry(box, &x, &y, &w, &h); if (w > 2 * npixels) pixRasterop(pixm, x + npixels, y - 6, w - 2 * npixels, h + 13, PIX_SET, NULL, 0, 0); boxDestroy(&box); } boxaDestroy(&boxa); } /* Find the ascenders and optionally filter with pixm. * For an explanation of the procedure used for counting the result * of the HMT, see comments in pixUpDownDetectGeneral(). */ pix1 = pixFlipFHMTGen(NULL, pix0, flipsel1); pix2 = pixFlipFHMTGen(NULL, pix0, flipsel2); pixOr(pix1, pix1, pix2); if (pixm) pixAnd(pix1, pix1, pixm); pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); pixCountPixels(pix3, &countup, NULL); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); /* Find the ascenders and optionally filter with pixm. */ pix1 = pixFlipFHMTGen(NULL, pix0, flipsel3); pix2 = pixFlipFHMTGen(NULL, pix0, flipsel4); pixOr(pix1, pix1, pix2); if (pixm) pixAnd(pix1, pix1, pixm); pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); pixCountPixels(pix3, &countdown, NULL); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); /* Evaluate statistically, generating a confidence that is * related to the probability with a gaussian distribution. */ nup = (l_float32)(countup); ndown = (l_float32)(countdown); nmax = L_MAX(countup, countdown); if (nmax > mincount) *pconf = 2. * ((nup - ndown) / sqrt(nup + ndown)); if (debug) { if (pixm) pixWriteDebug("/tmp/lept/orient/pixm2.png", pixm, IFF_PNG); fprintf(stderr, "nup = %7.3f, ndown = %7.3f, conf = %7.3f\n", nup, ndown, *pconf); if (*pconf > DEFAULT_MIN_UP_DOWN_CONF) fprintf(stderr, "Text is rightside-up\n"); if (*pconf < -DEFAULT_MIN_UP_DOWN_CONF) fprintf(stderr, "Text is upside-down\n"); } pixDestroy(&pix0); pixDestroy(&pixm); return 0; }
/*! * \brief pixMirrorDetect() * * \param[in] pixs 1 bpp, deskewed, English text * \param[out] pconf confidence that text is not LR mirror reversed * \param[in] mincount min number of left + right; use 0 for default * \param[in] debug 1 for debug output; 0 otherwise * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) For this test, it is necessary that the text is horizontally * oriented, with ascenders going up. * (2) conf is the normalized difference between the number of * right and left facing characters with ascenders. * Left-facing are {d}; right-facing are {b, h, k}. * At least that was the expectation. In practice, we can * really just say that it is the normalized difference in * hits using two specific hit-miss filters, textsel1 and textsel2, * after the image has been suitably pre-filtered so that * these filters are effective. See (4) for what's really happening. * (3) A large positive conf value indicates normal text, whereas * a large negative conf value means the page is mirror reversed. * (4) The implementation is a bit tricky. The general idea is * to fill the x-height part of characters, but not the space * between them, before doing the HMT. This is done by * finding pixels added using two different operations -- a * horizontal close and a vertical dilation -- and adding * the intersection of these sets to the original. It turns * out that the original intuition about the signal was largely * in error: much of the signal for right-facing characters * comes from the lower part of common x-height characters, like * the e and c, that remain open after these operations. * So it's important that the operations to close the x-height * parts of the characters are purposely weakened sufficiently * to allow these characters to remain open. The wonders * of morphology! * </pre> */ l_int32 pixMirrorDetect(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug) { l_int32 count1, count2, nmax; l_float32 nleft, nright; PIX *pix0, *pix1, *pix2, *pix3; SEL *sel1, *sel2; PROCNAME("pixMirrorDetect"); if (!pconf) return ERROR_INT("&conf not defined", procName, 1); *pconf = 0.0; if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); if (mincount == 0) mincount = DEFAULT_MIN_MIRROR_FLIP_COUNT; sel1 = selCreateFromString(textsel1, 5, 6, NULL); sel2 = selCreateFromString(textsel2, 5, 6, NULL); /* Fill x-height characters but not space between them, sort of. */ pix3 = pixMorphCompSequence(pixs, "d1.30", 0); pixXor(pix3, pix3, pixs); pix0 = pixMorphCompSequence(pixs, "c15.1", 0); pixXor(pix0, pix0, pixs); pixAnd(pix0, pix0, pix3); pixOr(pix0, pix0, pixs); pixDestroy(&pix3); /* Filter the right-facing characters. */ pix1 = pixHMT(NULL, pix0, sel1); pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); pixCountPixels(pix3, &count1, NULL); pixDebugFlipDetect("/tmp/lept/orient/right.png", pixs, pix1, debug); pixDestroy(&pix1); pixDestroy(&pix3); /* Filter the left-facing characters. */ pix2 = pixHMT(NULL, pix0, sel2); pix3 = pixReduceRankBinaryCascade(pix2, 1, 1, 0, 0); pixCountPixels(pix3, &count2, NULL); pixDebugFlipDetect("/tmp/lept/orient/left.png", pixs, pix2, debug); pixDestroy(&pix2); pixDestroy(&pix3); nright = (l_float32)count1; nleft = (l_float32)count2; nmax = L_MAX(count1, count2); pixDestroy(&pix0); selDestroy(&sel1); selDestroy(&sel2); if (nmax > mincount) *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft)); if (debug) { fprintf(stderr, "nright = %f, nleft = %f\n", nright, nleft); if (*pconf > DEFAULT_MIN_MIRROR_FLIP_CONF) fprintf(stderr, "Text is not mirror reversed\n"); if (*pconf < -DEFAULT_MIN_MIRROR_FLIP_CONF) fprintf(stderr, "Text is mirror reversed\n"); } return 0; }
/*! * \brief pixMirrorDetectDwa() * * \param[in] pixs 1 bpp, deskewed, English text * \param[out] pconf confidence that text is not LR mirror reversed * \param[in] mincount min number of left + right; use 0 for default * \param[in] debug 1 for debug output; 0 otherwise * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) We assume the text is horizontally oriented, with * ascenders going up. * (2) See notes in pixMirrorDetect(). * </pre> */ l_int32 pixMirrorDetectDwa(PIX *pixs, l_float32 *pconf, l_int32 mincount, l_int32 debug) { char flipsel1[] = "flipsel1"; char flipsel2[] = "flipsel2"; l_int32 count1, count2, nmax; l_float32 nleft, nright; PIX *pix0, *pix1, *pix2, *pix3; PROCNAME("pixMirrorDetectDwa"); if (!pconf) return ERROR_INT("&conf not defined", procName, 1); *pconf = 0.0; if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); if (mincount == 0) mincount = DEFAULT_MIN_MIRROR_FLIP_COUNT; /* Fill x-height characters but not space between them, sort of. */ pix3 = pixMorphSequenceDwa(pixs, "d1.30", 0); pixXor(pix3, pix3, pixs); pix0 = pixMorphSequenceDwa(pixs, "c15.1", 0); pixXor(pix0, pix0, pixs); pixAnd(pix0, pix0, pix3); pixOr(pix3, pix0, pixs); pixDestroy(&pix0); pix0 = pixAddBorderGeneral(pix3, ADDED_BORDER, ADDED_BORDER, ADDED_BORDER, ADDED_BORDER, 0); pixDestroy(&pix3); /* Filter the right-facing characters. */ pix1 = pixFlipFHMTGen(NULL, pix0, flipsel1); pix3 = pixReduceRankBinaryCascade(pix1, 1, 1, 0, 0); pixCountPixels(pix3, &count1, NULL); pixDestroy(&pix1); pixDestroy(&pix3); /* Filter the left-facing characters. */ pix2 = pixFlipFHMTGen(NULL, pix0, flipsel2); pix3 = pixReduceRankBinaryCascade(pix2, 1, 1, 0, 0); pixCountPixels(pix3, &count2, NULL); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix0); nright = (l_float32)count1; nleft = (l_float32)count2; nmax = L_MAX(count1, count2); if (nmax > mincount) *pconf = 2. * ((nright - nleft) / sqrt(nright + nleft)); if (debug) { fprintf(stderr, "nright = %f, nleft = %f\n", nright, nleft); if (*pconf > DEFAULT_MIN_MIRROR_FLIP_CONF) fprintf(stderr, "Text is not mirror reversed\n"); if (*pconf < -DEFAULT_MIN_MIRROR_FLIP_CONF) fprintf(stderr, "Text is mirror reversed\n"); } return 0; }
/*! * pixGetWordsInTextlines() * * Input: pixs (1 bpp, typ. 300 ppi) * reduction (1 for input res; 2 for 2x reduction of input res) * minwidth, minheight (of saved components; smaller are discarded) * maxwidth, maxheight (of saved components; larger are discarded) * &boxad (<return> word boxes sorted in textline line order) * &pixad (<return> word images sorted in textline line order) * &naindex (<return> index of textline for each word) * Return: 0 if OK, 1 on error * * Notes: * (1) The input should be at a resolution of about 300 ppi. * The word masks and word images can be computed at either * 150 ppi or 300 ppi. For the former, set reduction = 2. * (2) The four size constraints on saved components are all * scaled by @reduction. * (3) The result are word images (and their b.b.), extracted in * textline order, at either full res or 2x reduction, * and with a numa giving the textline index for each word. * (4) The pixa and boxa interfaces should make this type of * application simple to put together. The steps are: * - optionally reduce by 2x * - generate first estimate of word masks * - get b.b. of these, and remove the small and big ones * - extract pixa of the word images, using the b.b. * - sort actual word images in textline order (2d) * - flatten them to a pixa (1d), saving the textline index * for each pix * (5) In an actual application, it may be desirable to pre-filter * the input image to remove large components, to extract * single columns of text, and to deskew them. For example, * to remove both large components and small noisy components * that can interfere with the statistics used to estimate * parameters for segmenting by words, but still retain text lines, * the following image preprocessing can be done: * Pix *pixt = pixMorphSequence(pixs, "c40.1", 0); * Pix *pixf = pixSelectBySize(pixt, 0, 60, 8, * L_SELECT_HEIGHT, L_SELECT_IF_LT, NULL); * pixAnd(pixf, pixf, pixs); // the filtered image * The closing turns text lines into long blobs, but does not * significantly increase their height. But if there are many * small connected components in a dense texture, this is likely * to generate tall components that will be eliminated in pixf. */ l_int32 pixGetWordsInTextlines(PIX *pixs, l_int32 reduction, l_int32 minwidth, l_int32 minheight, l_int32 maxwidth, l_int32 maxheight, BOXA **pboxad, PIXA **ppixad, NUMA **pnai) { l_int32 maxdil; BOXA *boxa1, *boxad; BOXAA *baa; NUMA *nai; NUMAA *naa; PIXA *pixa1, *pixad; PIX *pix1; PIXAA *paa; PROCNAME("pixGetWordsInTextlines"); if (!pboxad || !ppixad || !pnai) return ERROR_INT("&boxad, &pixad, &nai not all defined", procName, 1); *pboxad = NULL; *ppixad = NULL; *pnai = NULL; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (reduction != 1 && reduction != 2) return ERROR_INT("reduction not in {1,2}", procName, 1); if (reduction == 1) { pix1 = pixClone(pixs); maxdil = 18; } else { /* reduction == 2 */ pix1 = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); maxdil = 9; } /* Get the bounding boxes of the words from the word mask. */ pixWordBoxesByDilation(pix1, maxdil, minwidth, minheight, maxwidth, maxheight, &boxa1, NULL); /* Generate a pixa of the word images */ pixa1 = pixaCreateFromBoxa(pix1, boxa1, NULL); /* mask over each word */ /* Sort the bounding boxes of these words by line. We use the * index mapping to allow identical sorting of the pixa. */ baa = boxaSort2d(boxa1, &naa, -1, -1, 4); paa = pixaSort2dByIndex(pixa1, naa, L_CLONE); /* Flatten the word paa */ pixad = pixaaFlattenToPixa(paa, &nai, L_CLONE); boxad = pixaGetBoxa(pixad, L_COPY); *pnai = nai; *pboxad = boxad; *ppixad = pixad; pixDestroy(&pix1); pixaDestroy(&pixa1); boxaDestroy(&boxa1); boxaaDestroy(&baa); pixaaDestroy(&paa); numaaDestroy(&naa); return 0; }
l_int32 DoPageSegmentation(PIX *pixs, /* should be at least 300 ppi */ l_int32 which) /* 1, 2, 3, 4 */ { char buf[256]; l_int32 zero; BOXA *boxatm, *boxahm; PIX *pixr; /* image reduced to 150 ppi */ PIX *pixhs; /* image of halftone seed, 150 ppi */ PIX *pixm; /* image of mask of components, 150 ppi */ PIX *pixhm1; /* image of halftone mask, 150 ppi */ PIX *pixhm2; /* image of halftone mask, 300 ppi */ PIX *pixht; /* image of halftone components, 150 ppi */ PIX *pixnht; /* image without halftone components, 150 ppi */ PIX *pixi; /* inverted image, 150 ppi */ PIX *pixvws; /* image of vertical whitespace, 150 ppi */ PIX *pixtm1; /* image of closed textlines, 150 ppi */ PIX *pixtm2; /* image of refined text line mask, 150 ppi */ PIX *pixtm3; /* image of refined text line mask, 300 ppi */ PIX *pixtb1; /* image of text block mask, 150 ppi */ PIX *pixtb2; /* image of text block mask, 300 ppi */ PIX *pixnon; /* image of non-text or halftone, 150 ppi */ PIX *pixt1, *pixt2, *pixt3; PIXA *pixa; PIXCMAP *cmap; PTAA *ptaa; l_int32 ht_flag = 0; l_int32 ws_flag = 0; l_int32 text_flag = 0; l_int32 block_flag = 0; PROCNAME("DoPageSegmentation"); if (which == 1) ht_flag = 1; else if (which == 2) ws_flag = 1; else if (which == 3) text_flag = 1; else if (which == 4) block_flag = 1; else return ERROR_INT("invalid parameter: not in [1...4]", procName, 1); pixDisplayWrite(NULL, -1); /* Reduce to 150 ppi */ pixt1 = pixScaleToGray2(pixs); pixDisplayWriteFormat(pixt1, L_MAX(ws_flag, L_MAX(ht_flag, block_flag)), IFF_PNG); if (which == 1) pixWrite("/tmp/lept/orig.gray.150.png", pixt1, IFF_PNG); pixDestroy(&pixt1); pixr = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); /* Get seed for halftone parts */ pixt1 = pixReduceRankBinaryCascade(pixr, 4, 4, 3, 0); pixt2 = pixOpenBrick(NULL, pixt1, 5, 5); pixhs = pixExpandBinaryPower2(pixt2, 8); pixDisplayWriteFormat(pixhs, ht_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/lept/htseed.150.png", pixhs, IFF_PNG); pixDestroy(&pixt1); pixDestroy(&pixt2); /* Get mask for connected regions */ pixm = pixCloseSafeBrick(NULL, pixr, 4, 4); pixDisplayWriteFormat(pixm, ht_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/lept/ccmask.150.png", pixm, IFF_PNG); /* Fill seed into mask to get halftone mask */ pixhm1 = pixSeedfillBinary(NULL, pixhs, pixm, 4); pixDisplayWriteFormat(pixhm1, ht_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/lept/htmask.150.png", pixhm1, IFF_PNG); pixhm2 = pixExpandBinaryPower2(pixhm1, 2); /* Extract halftone stuff */ pixht = pixAnd(NULL, pixhm1, pixr); if (which == 1) pixWrite("/tmp/lept/ht.150.png", pixht, IFF_PNG); /* Extract non-halftone stuff */ pixnht = pixXor(NULL, pixht, pixr); pixDisplayWriteFormat(pixnht, text_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/lept/text.150.png", pixnht, IFF_PNG); pixZero(pixht, &zero); if (zero) fprintf(stderr, "No halftone parts found\n"); else fprintf(stderr, "Halftone parts found\n"); /* Get bit-inverted image */ pixi = pixInvert(NULL, pixnht); if (which == 1) pixWrite("/tmp/lept/invert.150.png", pixi, IFF_PNG); pixDisplayWriteFormat(pixi, ws_flag, IFF_PNG); /* The whitespace mask will break textlines where there * is a large amount of white space below or above. * We can prevent this by identifying regions of the * inverted image that have large horizontal (bigger than * the separation between columns) and significant * vertical extent (bigger than the separation between * textlines), and subtracting this from the whitespace mask. */ pixt1 = pixMorphCompSequence(pixi, "o80.60", 0); pixt2 = pixSubtract(NULL, pixi, pixt1); pixDisplayWriteFormat(pixt2, ws_flag, IFF_PNG); pixDestroy(&pixt1); /* Identify vertical whitespace by opening inverted image */ pixt3 = pixOpenBrick(NULL, pixt2, 5, 1); /* removes thin vertical lines */ pixvws = pixOpenBrick(NULL, pixt3, 1, 200); /* gets long vertical lines */ pixDisplayWriteFormat(pixvws, L_MAX(text_flag, ws_flag), IFF_PNG); if (which == 1) pixWrite("/tmp/lept/vertws.150.png", pixvws, IFF_PNG); pixDestroy(&pixt2); pixDestroy(&pixt3); /* Get proto (early processed) text line mask. */ /* First close the characters and words in the textlines */ pixtm1 = pixCloseSafeBrick(NULL, pixnht, 30, 1); pixDisplayWriteFormat(pixtm1, text_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/lept/textmask1.150.png", pixtm1, IFF_PNG); /* Next open back up the vertical whitespace corridors */ pixtm2 = pixSubtract(NULL, pixtm1, pixvws); if (which == 1) pixWrite("/tmp/lept/textmask2.150.png", pixtm2, IFF_PNG); /* Do a small opening to remove noise */ pixOpenBrick(pixtm2, pixtm2, 3, 3); pixDisplayWriteFormat(pixtm2, text_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/lept/textmask3.150.png", pixtm2, IFF_PNG); pixtm3 = pixExpandBinaryPower2(pixtm2, 2); /* Join pixels vertically to make text block mask */ pixtb1 = pixMorphSequence(pixtm2, "c1.10 + o4.1", 0); pixDisplayWriteFormat(pixtb1, block_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/lept/textblock1.150.png", pixtb1, IFF_PNG); /* Solidify the textblock mask and remove noise: * (1) For each c.c., close the blocks and dilate slightly * to form a solid mask. * (2) Small horizontal closing between components * (3) Open the white space between columns, again * (4) Remove small components */ pixt1 = pixMorphSequenceByComponent(pixtb1, "c30.30 + d3.3", 8, 0, 0, NULL); pixCloseSafeBrick(pixt1, pixt1, 10, 1); pixDisplayWriteFormat(pixt1, block_flag, IFF_PNG); pixt2 = pixSubtract(NULL, pixt1, pixvws); pixt3 = pixSelectBySize(pixt2, 25, 5, 8, L_SELECT_IF_BOTH, L_SELECT_IF_GTE, NULL); pixDisplayWriteFormat(pixt3, block_flag, IFF_PNG); if (which == 1) pixWrite("/tmp/lept/textblock2.150.png", pixt3, IFF_PNG); pixtb2 = pixExpandBinaryPower2(pixt3, 2); pixDestroy(&pixt1); pixDestroy(&pixt2); pixDestroy(&pixt3); /* Identify the outlines of each textblock */ ptaa = pixGetOuterBordersPtaa(pixtb2); pixt1 = pixRenderRandomCmapPtaa(pixtb2, ptaa, 1, 8, 1); cmap = pixGetColormap(pixt1); pixcmapResetColor(cmap, 0, 130, 130, 130); /* set interior to gray */ if (which == 1) pixWrite("/tmp/lept/textblock3.300.png", pixt1, IFF_PNG); pixDisplayWithTitle(pixt1, 480, 360, "textblock mask with outlines", DFLAG); ptaaDestroy(&ptaa); pixDestroy(&pixt1); /* Fill line mask (as seed) into the original */ pixt1 = pixSeedfillBinary(NULL, pixtm3, pixs, 8); pixOr(pixtm3, pixtm3, pixt1); pixDestroy(&pixt1); if (which == 1) pixWrite("/tmp/lept/textmask.300.png", pixtm3, IFF_PNG); pixDisplayWithTitle(pixtm3, 480, 360, "textline mask 4", DFLAG); /* Fill halftone mask (as seed) into the original */ pixt1 = pixSeedfillBinary(NULL, pixhm2, pixs, 8); pixOr(pixhm2, pixhm2, pixt1); pixDestroy(&pixt1); if (which == 1) pixWrite("/tmp/lept/htmask.300.png", pixhm2, IFF_PNG); pixDisplayWithTitle(pixhm2, 520, 390, "halftonemask 2", DFLAG); /* Find objects that are neither text nor halftones */ pixt1 = pixSubtract(NULL, pixs, pixtm3); /* remove text pixels */ pixnon = pixSubtract(NULL, pixt1, pixhm2); /* remove halftone pixels */ if (which == 1) pixWrite("/tmp/lept/other.300.png", pixnon, IFF_PNG); pixDisplayWithTitle(pixnon, 540, 420, "other stuff", DFLAG); pixDestroy(&pixt1); /* Write out b.b. for text line mask and halftone mask components */ boxatm = pixConnComp(pixtm3, NULL, 4); boxahm = pixConnComp(pixhm2, NULL, 8); if (which == 1) boxaWrite("/tmp/lept/textmask.boxa", boxatm); if (which == 1) boxaWrite("/tmp/lept/htmask.boxa", boxahm); pixa = pixaReadFiles("/tmp/lept/display", "file"); pixt1 = pixaDisplayTiledAndScaled(pixa, 8, 250, 4, 0, 25, 2); snprintf(buf, sizeof(buf), "/tmp/lept/segout.%d.png", which); pixWrite(buf, pixt1, IFF_PNG); pixDestroy(&pixt1); pixaDestroy(&pixa); /* clean up to test with valgrind */ pixDestroy(&pixr); pixDestroy(&pixhs); pixDestroy(&pixm); pixDestroy(&pixhm1); pixDestroy(&pixhm2); pixDestroy(&pixht); pixDestroy(&pixnht); pixDestroy(&pixi); pixDestroy(&pixvws); pixDestroy(&pixtm1); pixDestroy(&pixtm2); pixDestroy(&pixtm3); pixDestroy(&pixtb1); pixDestroy(&pixtb2); pixDestroy(&pixnon); boxaDestroy(&boxatm); boxaDestroy(&boxahm); return 0; }
/*! * \brief pixFindSkewSweepAndSearchScorePivot() * * \param[in] pixs 1 bpp * \param[out] pangle angle required to deskew; in degrees * \param[out] pconf confidence given by ratio of max/min score * \param[out] pendscore [optional] max score; use NULL to ignore * \param[in] redsweep sweep reduction factor = 1, 2, 4 or 8 * \param[in] redsearch binary search reduction factor = 1, 2, 4 or 8; * and must not exceed redsweep * \param[in] sweepcenter angle about which sweep is performed; in degrees * \param[in] sweeprange half the full range, taken about sweepcenter; * in degrees * \param[in] sweepdelta angle increment of sweep; in degrees * \param[in] minbsdelta min binary search increment angle; in degrees * \param[in] pivot L_SHEAR_ABOUT_CORNER, L_SHEAR_ABOUT_CENTER * \return 0 if OK, 1 on error or if angle measurment not valid * * <pre> * Notes: * (1) See notes in pixFindSkewSweepAndSearchScore(). * (2) This allows choice of shear pivoting from either the UL corner * or the center. For small angles, the ability to discriminate * angles is better with shearing from the UL corner. However, * for large angles (say, greater than 20 degrees), it is better * to shear about the center because a shear from the UL corner * loses too much of the image. * </pre> */ l_int32 pixFindSkewSweepAndSearchScorePivot(PIX *pixs, l_float32 *pangle, l_float32 *pconf, l_float32 *pendscore, l_int32 redsweep, l_int32 redsearch, l_float32 sweepcenter, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta, l_int32 pivot) { l_int32 ret, bzero, i, nangles, n, ratio, maxindex, minloc; l_int32 width, height; l_float32 deg2rad, theta, delta; l_float32 sum, maxscore, maxangle; l_float32 centerangle, leftcenterangle, rightcenterangle; l_float32 lefttemp, righttemp; l_float32 bsearchscore[5]; l_float32 minscore, minthresh; l_float32 rangeleft; NUMA *natheta, *nascore; PIX *pixsw, *pixsch, *pixt1, *pixt2; PROCNAME("pixFindSkewSweepAndSearchScorePivot"); if (pendscore) *pendscore = 0.0; if (pangle) *pangle = 0.0; if (pconf) *pconf = 0.0; if (!pangle || !pconf) return ERROR_INT("&angle and/or &conf not defined", procName, 1); if (!pixs || pixGetDepth(pixs) != 1) return ERROR_INT("pixs not defined or not 1 bpp", procName, 1); if (redsweep != 1 && redsweep != 2 && redsweep != 4 && redsweep != 8) return ERROR_INT("redsweep must be in {1,2,4,8}", procName, 1); if (redsearch != 1 && redsearch != 2 && redsearch != 4 && redsearch != 8) return ERROR_INT("redsearch must be in {1,2,4,8}", procName, 1); if (redsearch > redsweep) return ERROR_INT("redsearch must not exceed redsweep", procName, 1); if (pivot != L_SHEAR_ABOUT_CORNER && pivot != L_SHEAR_ABOUT_CENTER) return ERROR_INT("invalid pivot", procName, 1); deg2rad = 3.1415926535 / 180.; ret = 0; /* Generate reduced image for binary search, if requested */ if (redsearch == 1) pixsch = pixClone(pixs); else if (redsearch == 2) pixsch = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); else if (redsearch == 4) pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); else /* redsearch == 8 */ pixsch = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); pixZero(pixsch, &bzero); if (bzero) { pixDestroy(&pixsch); return 1; } /* Generate reduced image for sweep, if requested */ ratio = redsweep / redsearch; if (ratio == 1) { pixsw = pixClone(pixsch); } else { /* ratio > 1 */ if (ratio == 2) pixsw = pixReduceRankBinaryCascade(pixsch, 1, 0, 0, 0); else if (ratio == 4) pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 0, 0); else /* ratio == 8 */ pixsw = pixReduceRankBinaryCascade(pixsch, 1, 2, 2, 0); } pixt1 = pixCreateTemplate(pixsw); if (ratio == 1) pixt2 = pixClone(pixt1); else pixt2 = pixCreateTemplate(pixsch); nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); natheta = numaCreate(nangles); nascore = numaCreate(nangles); if (!pixsch || !pixsw) { ret = ERROR_INT("pixsch and pixsw not both made", procName, 1); goto cleanup; } if (!pixt1 || !pixt2) { ret = ERROR_INT("pixt1 and pixt2 not both made", procName, 1); goto cleanup; } if (!natheta || !nascore) { ret = ERROR_INT("natheta and nascore not both made", procName, 1); goto cleanup; } /* Do sweep */ rangeleft = sweepcenter - sweeprange; for (i = 0; i < nangles; i++) { theta = rangeleft + i * sweepdelta; /* degrees */ /* Shear pix and put the result in pixt1 */ if (pivot == L_SHEAR_ABOUT_CORNER) pixVShearCorner(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); else pixVShearCenter(pixt1, pixsw, deg2rad * theta, L_BRING_IN_WHITE); /* Get score */ pixFindDifferentialSquareSum(pixt1, &sum); #if DEBUG_PRINT_SCORES L_INFO("sum(%7.2f) = %7.0f\n", procName, theta, sum); #endif /* DEBUG_PRINT_SCORES */ /* Save the result in the output arrays */ numaAddNumber(nascore, sum); numaAddNumber(natheta, theta); } /* Find the largest of the set (maxscore at maxangle) */ numaGetMax(nascore, &maxscore, &maxindex); numaGetFValue(natheta, maxindex, &maxangle); #if DEBUG_PRINT_SWEEP L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", procName, maxangle, maxscore); #endif /* DEBUG_PRINT_SWEEP */ #if DEBUG_PLOT_SCORES /* Plot the sweep result -- the scores versus rotation angle -- * using gnuplot with GPLOT_LINES (lines connecting data points). */ {GPLOT *gplot; gplot = gplotCreate("sweep_output", GPLOT_PNG, "Sweep. Variance of difference of ON pixels vs. angle", "angle (deg)", "score"); gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); gplotMakeOutput(gplot); gplotDestroy(&gplot); } #endif /* DEBUG_PLOT_SCORES */ /* Check if the max is at the end of the sweep. */ n = numaGetCount(natheta); if (maxindex == 0 || maxindex == n - 1) { L_WARNING("max found at sweep edge\n", procName); goto cleanup; } /* Empty the numas for re-use */ numaEmpty(nascore); numaEmpty(natheta); /* Do binary search to find skew angle. * First, set up initial three points. */ centerangle = maxangle; if (pivot == L_SHEAR_ABOUT_CORNER) { pixVShearCorner(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); pixVShearCorner(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); } else { pixVShearCenter(pixt2, pixsch, deg2rad * centerangle, L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[2]); pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle - sweepdelta), L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[0]); pixVShearCenter(pixt2, pixsch, deg2rad * (centerangle + sweepdelta), L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[4]); } numaAddNumber(nascore, bsearchscore[2]); numaAddNumber(natheta, centerangle); numaAddNumber(nascore, bsearchscore[0]); numaAddNumber(natheta, centerangle - sweepdelta); numaAddNumber(nascore, bsearchscore[4]); numaAddNumber(natheta, centerangle + sweepdelta); /* Start the search */ delta = 0.5 * sweepdelta; while (delta >= minbsdelta) { /* Get the left intermediate score */ leftcenterangle = centerangle - delta; if (pivot == L_SHEAR_ABOUT_CORNER) pixVShearCorner(pixt2, pixsch, deg2rad * leftcenterangle, L_BRING_IN_WHITE); else pixVShearCenter(pixt2, pixsch, deg2rad * leftcenterangle, L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[1]); numaAddNumber(nascore, bsearchscore[1]); numaAddNumber(natheta, leftcenterangle); /* Get the right intermediate score */ rightcenterangle = centerangle + delta; if (pivot == L_SHEAR_ABOUT_CORNER) pixVShearCorner(pixt2, pixsch, deg2rad * rightcenterangle, L_BRING_IN_WHITE); else pixVShearCenter(pixt2, pixsch, deg2rad * rightcenterangle, L_BRING_IN_WHITE); pixFindDifferentialSquareSum(pixt2, &bsearchscore[3]); numaAddNumber(nascore, bsearchscore[3]); numaAddNumber(natheta, rightcenterangle); /* Find the maximum of the five scores and its location. * Note that the maximum must be in the center * three values, not in the end two. */ maxscore = bsearchscore[1]; maxindex = 1; for (i = 2; i < 4; i++) { if (bsearchscore[i] > maxscore) { maxscore = bsearchscore[i]; maxindex = i; } } /* Set up score array to interpolate for the next iteration */ lefttemp = bsearchscore[maxindex - 1]; righttemp = bsearchscore[maxindex + 1]; bsearchscore[2] = maxscore; bsearchscore[0] = lefttemp; bsearchscore[4] = righttemp; /* Get new center angle and delta for next iteration */ centerangle = centerangle + delta * (maxindex - 2); delta = 0.5 * delta; } *pangle = centerangle; #if DEBUG_PRINT_SCORES L_INFO(" Binary search score = %7.3f\n", procName, bsearchscore[2]); #endif /* DEBUG_PRINT_SCORES */ if (pendscore) /* save if requested */ *pendscore = bsearchscore[2]; /* Return the ratio of Max score over Min score * as a confidence value. Don't trust if the Min score * is too small, which can happen if the image is all black * with only a few white pixels interspersed. In that case, * we get a contribution from the top and bottom edges when * vertically sheared, but this contribution becomes zero when * the shear angle is zero. For zero shear angle, the only * contribution will be from the white pixels. We expect that * the signal goes as the product of the (height * width^2), * so we compute a (hopefully) normalized minimum threshold as * a function of these dimensions. */ numaGetMin(nascore, &minscore, &minloc); width = pixGetWidth(pixsch); height = pixGetHeight(pixsch); minthresh = MINSCORE_THRESHOLD_CONSTANT * width * width * height; #if DEBUG_THRESHOLD L_INFO(" minthresh = %10.2f, minscore = %10.2f\n", procName, minthresh, minscore); L_INFO(" maxscore = %10.2f\n", procName, maxscore); #endif /* DEBUG_THRESHOLD */ if (minscore > minthresh) *pconf = maxscore / minscore; else *pconf = 0.0; /* Don't trust it if too close to the edge of the sweep * range or if maxscore is small */ if ((centerangle > rangeleft + 2 * sweeprange - sweepdelta) || (centerangle < rangeleft + sweepdelta) || (maxscore < MIN_VALID_MAXSCORE)) *pconf = 0.0; #if DEBUG_PRINT_BINARY fprintf(stderr, "Binary search: angle = %7.3f, score ratio = %6.2f\n", *pangle, *pconf); fprintf(stderr, " max score = %8.0f\n", maxscore); #endif /* DEBUG_PRINT_BINARY */ #if DEBUG_PLOT_SCORES /* Plot the result -- the scores versus rotation angle -- * using gnuplot with GPLOT_POINTS. Because the data * points are not ordered by theta (increasing or decreasing), * using GPLOT_LINES would be confusing! */ {GPLOT *gplot; gplot = gplotCreate("search_output", GPLOT_PNG, "Binary search. Variance of difference of ON pixels vs. angle", "angle (deg)", "score"); gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot1"); gplotMakeOutput(gplot); gplotDestroy(&gplot); } #endif /* DEBUG_PLOT_SCORES */ cleanup: pixDestroy(&pixsw); pixDestroy(&pixsch); pixDestroy(&pixt1); pixDestroy(&pixt2); numaDestroy(&nascore); numaDestroy(&natheta); return ret; }
/*! * \brief pixFindSkewSweep() * * \param[in] pixs 1 bpp * \param[out] pangle angle required to deskew, in degrees * \param[in] reduction factor = 1, 2, 4 or 8 * \param[in] sweeprange half the full range; assumed about 0; in degrees * \param[in] sweepdelta angle increment of sweep; in degrees * \return 0 if OK, 1 on error or if angle measurment not valid * * <pre> * Notes: * (1) This examines the 'score' for skew angles with equal intervals. * (2) Caller must check the return value for validity of the result. * </pre> */ l_int32 pixFindSkewSweep(PIX *pixs, l_float32 *pangle, l_int32 reduction, l_float32 sweeprange, l_float32 sweepdelta) { l_int32 ret, bzero, i, nangles; l_float32 deg2rad, theta; l_float32 sum, maxscore, maxangle; NUMA *natheta, *nascore; PIX *pix, *pixt; PROCNAME("pixFindSkewSweep"); if (!pangle) return ERROR_INT("&angle not defined", procName, 1); *pangle = 0.0; if (!pixs) return ERROR_INT("pixs not defined", procName, 1); if (pixGetDepth(pixs) != 1) return ERROR_INT("pixs not 1 bpp", procName, 1); if (reduction != 1 && reduction != 2 && reduction != 4 && reduction != 8) return ERROR_INT("reduction must be in {1,2,4,8}", procName, 1); deg2rad = 3.1415926535 / 180.; ret = 0; /* Generate reduced image, if requested */ if (reduction == 1) pix = pixClone(pixs); else if (reduction == 2) pix = pixReduceRankBinaryCascade(pixs, 1, 0, 0, 0); else if (reduction == 4) pix = pixReduceRankBinaryCascade(pixs, 1, 1, 0, 0); else /* reduction == 8 */ pix = pixReduceRankBinaryCascade(pixs, 1, 1, 2, 0); pixZero(pix, &bzero); if (bzero) { pixDestroy(&pix); return 1; } nangles = (l_int32)((2. * sweeprange) / sweepdelta + 1); natheta = numaCreate(nangles); nascore = numaCreate(nangles); pixt = pixCreateTemplate(pix); if (!pix || !pixt) { ret = ERROR_INT("pix and pixt not both made", procName, 1); goto cleanup; } if (!natheta || !nascore) { ret = ERROR_INT("natheta and nascore not both made", procName, 1); goto cleanup; } for (i = 0; i < nangles; i++) { theta = -sweeprange + i * sweepdelta; /* degrees */ /* Shear pix about the UL corner and put the result in pixt */ pixVShearCorner(pixt, pix, deg2rad * theta, L_BRING_IN_WHITE); /* Get score */ pixFindDifferentialSquareSum(pixt, &sum); #if DEBUG_PRINT_SCORES L_INFO("sum(%7.2f) = %7.0f\n", procName, theta, sum); #endif /* DEBUG_PRINT_SCORES */ /* Save the result in the output arrays */ numaAddNumber(nascore, sum); numaAddNumber(natheta, theta); } /* Find the location of the maximum (i.e., the skew angle) * by fitting the largest data point and its two neighbors * to a quadratic, using lagrangian interpolation. */ numaFitMax(nascore, &maxscore, natheta, &maxangle); *pangle = maxangle; #if DEBUG_PRINT_SWEEP L_INFO(" From sweep: angle = %7.3f, score = %7.3f\n", procName, maxangle, maxscore); #endif /* DEBUG_PRINT_SWEEP */ #if DEBUG_PLOT_SCORES /* Plot the result -- the scores versus rotation angle -- * using gnuplot with GPLOT_LINES (lines connecting data points). * The GPLOT data structure is first created, with the * appropriate data incorporated from the two input NUMAs, * and then the function gplotMakeOutput() uses gnuplot to * generate the output plot. This can be either a .png file * or a .ps file, depending on whether you use GPLOT_PNG * or GPLOT_PS. */ {GPLOT *gplot; gplot = gplotCreate("sweep_output", GPLOT_PNG, "Sweep. Variance of difference of ON pixels vs. angle", "angle (deg)", "score"); gplotAddPlot(gplot, natheta, nascore, GPLOT_LINES, "plot1"); gplotAddPlot(gplot, natheta, nascore, GPLOT_POINTS, "plot2"); gplotMakeOutput(gplot); gplotDestroy(&gplot); } #endif /* DEBUG_PLOT_SCORES */ cleanup: pixDestroy(&pix); pixDestroy(&pixt); numaDestroy(&nascore); numaDestroy(&natheta); return ret; }