/*! * l_dnaConvertToNuma() * * Input: da * Return: na, or null on error */ NUMA * l_dnaConvertToNuma(L_DNA *da) { l_int32 i, n; l_float64 val; NUMA *na; PROCNAME("l_dnaConvertToNuma"); if (!da) return (NUMA *)ERROR_PTR("da not defined", procName, NULL); n = l_dnaGetCount(da); na = numaCreate(n); for (i = 0; i < n; i++) { l_dnaGetDValue(da, i, &val); numaAddNumber(na, val); } return na; }

/*! * \brief numaCreateFromIArray() * * \param[in] iarray integer * \param[in] size of the array * \return na, or NULL on error * * <pre> * Notes: * (1) We can't insert this int array into the numa, because a numa * takes a float array. So this just copies the data from the * input array into the numa. The input array continues to be * owned by the caller. * </pre> */ NUMA * numaCreateFromIArray(l_int32 *iarray, l_int32 size) { l_int32 i; NUMA *na; PROCNAME("numaCreateFromIArray"); if (!iarray) return (NUMA *)ERROR_PTR("iarray not defined", procName, NULL); if (size <= 0) return (NUMA *)ERROR_PTR("size must be > 0", procName, NULL); na = numaCreate(size); for (i = 0; i < size; i++) numaAddNumber(na, iarray[i]); return na; }

/*! * ptaGetSortIndex() * * Input: ptas * sorttype (L_SORT_BY_X, L_SORT_BY_Y) * sortorder (L_SORT_INCREASING, L_SORT_DECREASING) * &naindex (<return> index of sorted order into * original array) * Return: 0 if OK, 1 on error */ l_int32 ptaGetSortIndex(PTA *ptas, l_int32 sorttype, l_int32 sortorder, NUMA **pnaindex) { l_int32 i, n; l_float32 x, y; NUMA *na; PROCNAME("ptaGetSortIndex"); if (!pnaindex) return ERROR_INT("&naindex not defined", procName, 1); *pnaindex = NULL; if (!ptas) return ERROR_INT("ptas not defined", procName, 1); if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y) return ERROR_INT("invalid sort type", procName, 1); if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) return ERROR_INT("invalid sort order", procName, 1); /* Build up numa of specific data */ n = ptaGetCount(ptas); if ((na = numaCreate(n)) == NULL) return ERROR_INT("na not made", procName, 1); for (i = 0; i < n; i++) { ptaGetPt(ptas, i, &x, &y); if (sorttype == L_SORT_BY_X) numaAddNumber(na, x); else numaAddNumber(na, y); } /* Get the sort index for data array */ *pnaindex = numaGetSortIndex(na, sortorder); numaDestroy(&na); if (!*pnaindex) return ERROR_INT("naindex not made", procName, 1); return 0; }

/*! * boxaExtractSortedPattern() * * Input: boxa (typ. of word bounding boxes, in textline order) * numa (index of textline for each box in boxa) * Return: naa (numaa, where each numa represents one textline), * or null on error * * Notes: * (1) The input is expected to come from pixGetWordBoxesInTextlines(). * (2) Each numa in the output consists of an average y coordinate * of the first box in the textline, followed by pairs of * x coordinates representing the left and right edges of each * of the boxes in the textline. */ NUMAA * boxaExtractSortedPattern(BOXA *boxa, NUMA *na) { l_int32 index, nbox, row, prevrow, x, y, w, h; BOX *box; NUMA *nad; NUMAA *naa; PROCNAME("boxaExtractSortedPattern"); if (!boxa) return (NUMAA *) ERROR_PTR("boxa not defined", procName, NULL); if (!na) return (NUMAA *) ERROR_PTR("na not defined", procName, NULL); naa = numaaCreate(0); nbox = boxaGetCount(boxa); if (nbox == 0) return naa; prevrow = -1; for (index = 0; index < nbox; index++) { box = boxaGetBox(boxa, index, L_CLONE); numaGetIValue(na, index, &row); if (row > prevrow) { if (index > 0) numaaAddNuma(naa, nad, L_INSERT); nad = numaCreate(0); prevrow = row; boxGetGeometry(box, NULL, &y, NULL, &h); numaAddNumber(nad, y + h / 2); } boxGetGeometry(box, &x, NULL, &w, NULL); numaAddNumber(nad, x); numaAddNumber(nad, x + w - 1); boxDestroy(&box); } numaaAddNuma(naa, nad, L_INSERT); return naa; }

/*! * numaCopy() * * Input: na * Return: copy of numa, or null on error */ NUMA * numaCopy(NUMA *na) { l_int32 i; NUMA *cna; PROCNAME("numaCopy"); if (!na) return (NUMA *)ERROR_PTR("na not defined", procName, NULL); if ((cna = numaCreate(na->nalloc)) == NULL) return (NUMA *)ERROR_PTR("cna not made", procName, NULL); cna->startx = na->startx; cna->delx = na->delx; for (i = 0; i < na->n; i++) numaAddNumber(cna, na->array[i]); return cna; }

static void DisplayMapRGBHistogram(L_AMAP *m, const char *rootname) { char buf[128]; l_int32 ncolors, npix, ival, maxn, maxn2; l_uint32 val32, maxcolor; L_AMAP_NODE *n; NUMA *na; fprintf(stderr, "\n --------------- Display RGB histogram ------------\n"); na = numaCreate(0); ncolors = npix = 0; maxn = 0; maxcolor = 0; n = l_amapGetFirst(m); while (n) { ncolors++; ival = n->value.itype; if (ival > maxn) { maxn = ival; maxcolor = n->key.utype; } numaAddNumber(na, ival); npix += ival; n = l_amapGetNext(n); } fprintf(stderr, " Num colors = %d, Num pixels = %d\n", ncolors, npix); fprintf(stderr, " Color %x has count %d\n", maxcolor, maxn); maxn2 = amapGetCountForColor(m, maxcolor); if (maxn != maxn2) fprintf(stderr, " Error: maxn2 = %d; not equal to %d\n", maxn, maxn2); gplotSimple1(na, GPLOT_PNG, rootname, NULL); snprintf(buf, sizeof(buf), "%s.png", rootname); l_fileDisplay(buf, 1400, 0, 1.0); numaDestroy(&na); return; }

/*! * numaCreateFromString() * * Input: string (of comma-separated numbers) * Return: na, or null on error * * Notes: * (1) The numbers can be ints or floats; they will be interpreted * and stored as floats. To use them as integers (e.g., for * indexing into arrays), use numaGetIValue(...). */ NUMA * numaCreateFromString(const char *str) { char *substr; l_int32 i, n, nerrors; l_float32 val; NUMA *na; SARRAY *sa; PROCNAME("numaCreateFromString"); if (!str || (strlen(str) == 0)) return (NUMA *)ERROR_PTR("str not defined or empty", procName, NULL); sa = sarrayCreate(0); sarraySplitString(sa, str, ","); n = sarrayGetCount(sa); na = numaCreate(n); nerrors = 0; for (i = 0; i < n; i++) { substr = sarrayGetString(sa, i, L_NOCOPY); if (sscanf(substr, "%f", &val) != 1) { L_ERROR("substr %d not float\n", procName, i); nerrors++; } else { numaAddNumber(na, val); } } sarrayDestroy(&sa); if (nerrors > 0) { numaDestroy(&na); return (NUMA *)ERROR_PTR("non-floats in string", procName, NULL); } return na; }

/*! * \brief numaReadStream() * * \param[in] fp file stream * \return numa, or NULL on error */ NUMA * numaReadStream(FILE *fp) { l_int32 i, n, index, ret, version; l_float32 val, startx, delx; NUMA *na; PROCNAME("numaReadStream"); if (!fp) return (NUMA *)ERROR_PTR("stream not defined", procName, NULL); ret = fscanf(fp, "\nNuma Version %d\n", &version); if (ret != 1) return (NUMA *)ERROR_PTR("not a numa file", procName, NULL); if (version != NUMA_VERSION_NUMBER) return (NUMA *)ERROR_PTR("invalid numa version", procName, NULL); if (fscanf(fp, "Number of numbers = %d\n", &n) != 1) return (NUMA *)ERROR_PTR("invalid number of numbers", procName, NULL); if ((na = numaCreate(n)) == NULL) return (NUMA *)ERROR_PTR("na not made", procName, NULL); for (i = 0; i < n; i++) { if (fscanf(fp, " [%d] = %f\n", &index, &val) != 2) { numaDestroy(&na); return (NUMA *)ERROR_PTR("bad input data", procName, NULL); } numaAddNumber(na, val); } /* Optional data */ if (fscanf(fp, "startx = %f, delx = %f\n", &startx, &delx) == 2) numaSetParameters(na, startx, delx); return na; }

/*! * numa2dAddNumber() * * Input: na2d * row of 2d array * col of 2d array * val (float or int to be added; stored as a float) * Return: 0 if OK, 1 on error */ l_int32 numa2dAddNumber(NUMA2D *na2d, l_int32 row, l_int32 col, l_float32 val) { NUMA *na; PROCNAME("numa2dAddNumber"); if (!na2d) return ERROR_INT("na2d not defined", procName, 1); if (row < 0 || row >= na2d->nrows) return ERROR_INT("row out of bounds", procName, 1); if (col < 0 || col >= na2d->ncols) return ERROR_INT("col out of bounds", procName, 1); if ((na = na2d->numa[row][col]) == NULL) { na = numaCreate(na2d->initsize); na2d->numa[row][col] = na; } numaAddNumber(na, val); return 0; }

/*! * numaHashAdd() * * Input: nahash * key (key to be hashed into a bucket number) * value (float value to be appended to the specific numa) * Return: 0 if OK; 1 on error */ l_int32 numaHashAdd(NUMAHASH *nahash, l_uint32 key, l_float32 value) { l_int32 bucket; NUMA *na; PROCNAME("numaHashAdd"); if (!nahash) return ERROR_INT("nahash not defined", procName, 1); if (key < 0) return ERROR_INT("key < 0", procName, 1); bucket = key % nahash->nbuckets; na = nahash->numa[bucket]; if (!na) { if ((na = numaCreate(nahash->initsize)) == NULL) return ERROR_INT("na not made", procName, 1); nahash->numa[bucket] = na; } numaAddNumber(na, value); return 0; }

/*! * \brief jbWordsInTextlines() * * \param[in] dirin directory of input pages * \param[in] reduction 1 for full res; 2 for half-res * \param[in] maxwidth of word mask components, to be kept * \param[in] maxheight of word mask components, to be kept * \param[in] thresh on correlation; 0.80 is reasonable * \param[in] weight for handling thick text; 0.6 is reasonable * \param[out] pnatl numa with textline index for each component * \param[in] firstpage 0-based * \param[in] npages use 0 for all pages in dirin * \return classer for the set of pages * * <pre> * Notes: * (1) This is a high-level function. See prog/jbwords for example * of usage. * (2) Typically, words can be found reasonably well at a resolution * of about 150 ppi. For highest accuracy, you should use 300 ppi. * Assuming that the input images are 300 ppi, use reduction = 1 * for finding words at full res, and reduction = 2 for finding * them at 150 ppi. * </pre> */ JBCLASSER * jbWordsInTextlines(const char *dirin, l_int32 reduction, l_int32 maxwidth, l_int32 maxheight, l_float32 thresh, l_float32 weight, NUMA **pnatl, l_int32 firstpage, l_int32 npages) { char *fname; l_int32 nfiles, i, w, h; BOXA *boxa; JBCLASSER *classer; NUMA *nai, *natl; PIX *pix; PIXA *pixa; SARRAY *safiles; PROCNAME("jbWordsInTextlines"); if (!pnatl) return (JBCLASSER *)ERROR_PTR("&natl not defined", procName, NULL); *pnatl = NULL; if (!dirin) return (JBCLASSER *)ERROR_PTR("dirin not defined", procName, NULL); if (reduction != 1 && reduction != 2) return (JBCLASSER *)ERROR_PTR("reduction not in {1,2}", procName, NULL); safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); nfiles = sarrayGetCount(safiles); /* Classify components */ classer = jbCorrelationInit(JB_WORDS, maxwidth, maxheight, thresh, weight); classer->safiles = sarrayCopy(safiles); natl = numaCreate(0); *pnatl = natl; for (i = 0; i < nfiles; i++) { fname = sarrayGetString(safiles, i, L_NOCOPY); if ((pix = pixRead(fname)) == NULL) { L_WARNING("image file %d not read\n", procName, i); continue; } pixGetDimensions(pix, &w, &h, NULL); if (reduction == 1) { classer->w = w; classer->h = h; } else { /* reduction == 2 */ classer->w = w / 2; classer->h = h / 2; } pixGetWordsInTextlines(pix, reduction, JB_WORDS_MIN_WIDTH, JB_WORDS_MIN_HEIGHT, maxwidth, maxheight, &boxa, &pixa, &nai); jbAddPageComponents(classer, pix, boxa, pixa); numaJoin(natl, nai, 0, -1); pixDestroy(&pix); numaDestroy(&nai); boxaDestroy(&boxa); pixaDestroy(&pixa); } sarrayDestroy(&safiles); return classer; }

l_int32 main(int argc, char **argv) { l_int32 ret, i, n, similar, x1, y1, val1, val2, val3, val4; l_float32 minave, minave2, maxave, fract; NUMA *na1, *na2, *na3, *na4, *na5, *na6; NUMAA *naa; PIX *pixs, *pix1, *pix2, *pix3, *pix4; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; pixs = pixRead("feyn.tif"); pix1 = pixScaleToGray6(pixs); pixDisplayWithTitle(pix1, 100, 600, NULL, rp->display); /* Find averages of min and max along about 120 horizontal lines */ fprintf(stderr, "Ignore the following 12 error messages:\n"); na1 = numaCreate(0); na3 = numaCreate(0); for (y1 = 40; y1 < 575; y1 += 5) { ret = pixMinMaxNearLine(pix1, 20, y1, 400, y1, 5, L_SCAN_BOTH, NULL, NULL, &minave, &maxave); if (!ret) { numaAddNumber(na1, (l_int32)minave); numaAddNumber(na3, (l_int32)maxave); if (rp->display) fprintf(stderr, "y = %d: minave = %d, maxave = %d\n", y1, (l_int32)minave, (l_int32)maxave); } } /* Find averages along about 120 vertical lines. We've rotated * the image by 90 degrees, so the results should be nearly * identical to the first set. Also generate a single-sided * scan (L_SCAN_NEGATIVE) for comparison with the double-sided scans. */ pix2 = pixRotateOrth(pix1, 3); pixDisplayWithTitle(pix2, 600, 600, NULL, rp->display); na2 = numaCreate(0); na4 = numaCreate(0); na5 = numaCreate(0); for (x1 = 40; x1 < 575; x1 += 5) { ret = pixMinMaxNearLine(pix2, x1, 20, x1, 400, 5, L_SCAN_BOTH, NULL, NULL, &minave, &maxave); pixMinMaxNearLine(pix2, x1, 20, x1, 400, 5, L_SCAN_NEGATIVE, NULL, NULL, &minave2, NULL); if (!ret) { numaAddNumber(na2, (l_int32)minave); numaAddNumber(na4, (l_int32)maxave); numaAddNumber(na5, (l_int32)minave2); if (rp->display) fprintf(stderr, "x = %d: minave = %d, minave2 = %d, maxave = %d\n", x1, (l_int32)minave, (l_int32)minave2, (l_int32)maxave); } } numaSimilar(na1, na2, 3.0, &similar); /* should be TRUE */ regTestCompareValues(rp, similar, 1, 0); /* 0 */ numaSimilar(na3, na4, 1.0, &similar); /* should be TRUE */ regTestCompareValues(rp, similar, 1, 0); /* 1 */ numaWrite("/tmp/lept/regout/na1.na", na1); numaWrite("/tmp/lept/regout/na2.na", na2); numaWrite("/tmp/lept/regout/na3.na", na3); numaWrite("/tmp/lept/regout/na4.na", na4); numaWrite("/tmp/lept/regout/na5.na", na5); regTestCheckFile(rp, "/tmp/lept/regout/na1.na"); /* 2 */ regTestCheckFile(rp, "/tmp/lept/regout/na2.na"); /* 3 */ regTestCheckFile(rp, "/tmp/lept/regout/na3.na"); /* 4 */ regTestCheckFile(rp, "/tmp/lept/regout/na4.na"); /* 5 */ regTestCheckFile(rp, "/tmp/lept/regout/na5.na"); /* 6 */ /* Plot the average minimums for the 3 cases */ naa = numaaCreate(3); numaaAddNuma(naa, na1, L_INSERT); /* portrait, double-sided */ numaaAddNuma(naa, na2, L_INSERT); /* landscape, double-sided */ numaaAddNuma(naa, na5, L_INSERT); /* landscape, single-sided */ gplotSimpleN(naa, GPLOT_PNG, "/tmp/lept/regout/nearline", "Average minimums along lines"); #if 0 #ifndef _WIN32 sleep(1); #else Sleep(1000); #endif /* _WIN32 */ #endif pix3 = pixRead("/tmp/lept/regout/nearline.png"); regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 7 */ pixDisplayWithTitle(pix3, 100, 100, NULL, rp->display); if (rp->display) { n = numaGetCount(na3); for (i = 0; i < n; i++) { numaGetIValue(na1, i, &val1); numaGetIValue(na2, i, &val2); numaGetIValue(na3, i, &val3); numaGetIValue(na4, i, &val4); fprintf(stderr, "val1 = %d, val2 = %d, diff = %d; " "val3 = %d, val4 = %d, diff = %d\n", val1, val2, L_ABS(val1 - val2), val3, val4, L_ABS(val3 - val4)); } } numaaDestroy(&naa); numaDestroy(&na3); numaDestroy(&na4); /* Plot minima along a single line, with different distances */ pixMinMaxNearLine(pix1, 20, 200, 400, 200, 2, L_SCAN_BOTH, &na1, NULL, NULL, NULL); pixMinMaxNearLine(pix1, 20, 200, 400, 200, 5, L_SCAN_BOTH, &na2, NULL, NULL, NULL); pixMinMaxNearLine(pix1, 20, 200, 400, 200, 15, L_SCAN_BOTH, &na3, NULL, NULL, NULL); numaWrite("/tmp/lept/regout/na6.na", na1); regTestCheckFile(rp, "/tmp/lept/regout/na6.na"); /* 8 */ n = numaGetCount(na1); fract = 100.0 / n; na4 = numaTransform(na1, 0.0, fract); na5 = numaTransform(na2, 0.0, fract); na6 = numaTransform(na3, 0.0, fract); numaDestroy(&na1); numaDestroy(&na2); numaDestroy(&na3); na1 = numaUniformSampling(na4, 100); na2 = numaUniformSampling(na5, 100); na3 = numaUniformSampling(na6, 100); naa = numaaCreate(3); numaaAddNuma(naa, na1, L_INSERT); numaaAddNuma(naa, na2, L_INSERT); numaaAddNuma(naa, na3, L_INSERT); gplotSimpleN(naa, GPLOT_PNG, "/tmp/lept/regout/nearline2", "Min along line"); pix4 = pixRead("/tmp/lept/regout/nearline2.png"); regTestWritePixAndCheck(rp, pix4, IFF_PNG); /* 9 */ pixDisplayWithTitle(pix4, 800, 100, NULL, rp->display); numaaDestroy(&naa); numaDestroy(&na4); numaDestroy(&na5); numaDestroy(&na6); pixDestroy(&pix1); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); pixDestroy(&pixs); return regTestCleanup(rp); }

int main(int argc, char **argv) { char label[512]; l_int32 rval, gval, bval, w, h, i, j, rwhite, gwhite, bwhite, count; l_uint32 pixel; GPLOT *gplot1, *gplot2; NUMA *naseq, *na; NUMAA *naa1, *naa2; PIX *pixs, *pixt, *pixt0, *pixt1, *pixt2; PIX *pixr, *pixg, *pixb; PIXA *pixa; PIXCMAP *cmap; static char mainName[] = "colorspacetest"; if (argc != 2) return ERROR_INT(" Syntax: colorspacetest filein", mainName, 1); if ((pixs = pixRead(argv[1])) == NULL) return ERROR_INT("pixs not made", mainName, 1); /* Generate colors by sampling hue with max sat and value. * This was used to make the color strip 19-colors.png. */ pixa = pixaCreate(19); for (i = 0; i < 19; i++) { convertHSVToRGB((240 * i / 18), 255, 255, &rval, &gval, &bval); composeRGBPixel(rval, gval, bval, &pixel); pixt1 = pixCreate(50, 100, 32); pixSetAllArbitrary(pixt1, pixel); pixaAddPix(pixa, pixt1, L_INSERT); } pixt2 = pixaDisplayTiledInRows(pixa, 32, 1100, 1.0, 0, 0, 0); pixDisplayWrite(pixt2, 1); pixDestroy(&pixt2); pixaDestroy(&pixa); /* Colorspace conversion in rgb */ pixDisplayWrite(pixs, 1); pixt = pixConvertRGBToHSV(NULL, pixs); pixDisplayWrite(pixt, 1); pixConvertHSVToRGB(pixt, pixt); pixDisplayWrite(pixt, 1); pixDestroy(&pixt); /* Colorspace conversion on a colormap */ pixt = pixOctreeQuantNumColors(pixs, 25, 0); pixDisplayWrite(pixt, 1); cmap = pixGetColormap(pixt); pixcmapWriteStream(stderr, cmap); pixcmapConvertRGBToHSV(cmap); pixcmapWriteStream(stderr, cmap); pixDisplayWrite(pixt, 1); pixcmapConvertHSVToRGB(cmap); pixcmapWriteStream(stderr, cmap); pixDisplayWrite(pixt, 1); pixDestroy(&pixt); /* Color content extraction */ pixColorContent(pixs, 0, 0, 0, 0, &pixr, &pixg, &pixb); pixDisplayWrite(pixr, 1); pixDisplayWrite(pixg, 1); pixDisplayWrite(pixb, 1); pixDestroy(&pixr); pixDestroy(&pixg); pixDestroy(&pixb); /* Color content measurement */ pixa = pixaCreate(20); naseq = numaMakeSequence(100, 5, 20); naa1 = numaaCreate(6); naa2 = numaaCreate(6); for (i = 0; i < 6; i++) { na = numaCreate(20); numaaAddNuma(naa1, na, L_COPY); numaaAddNuma(naa2, na, L_INSERT); } pixGetDimensions(pixs, &w, &h, NULL); for (i = 0; i < 20; i++) { rwhite = 100 + 5 * i; gwhite = 200 - 5 * i; bwhite = 150; pixt0 = pixGlobalNormRGB(NULL, pixs, rwhite, gwhite, bwhite, 255); pixaAddPix(pixa, pixt0, L_INSERT); pixt1 = pixColorMagnitude(pixs, rwhite, gwhite, bwhite, L_MAX_DIFF_FROM_AVERAGE_2); for (j = 0; j < 6; j++) { pixt2 = pixThresholdToBinary(pixt1, 30 + 10 * j); pixInvert(pixt2, pixt2); pixCountPixels(pixt2, &count, NULL); na = numaaGetNuma(naa1, j, L_CLONE); numaAddNumber(na, (l_float32)count / (l_float32)(w * h)); numaDestroy(&na); pixDestroy(&pixt2); } pixDestroy(&pixt1); pixt1 = pixColorMagnitude(pixs, rwhite, gwhite, bwhite, L_MAX_MIN_DIFF_FROM_2); for (j = 0; j < 6; j++) { pixt2 = pixThresholdToBinary(pixt1, 30 + 10 * j); pixInvert(pixt2, pixt2); pixCountPixels(pixt2, &count, NULL); na = numaaGetNuma(naa2, j, L_CLONE); numaAddNumber(na, (l_float32)count / (l_float32)(w * h)); numaDestroy(&na); pixDestroy(&pixt2); } pixDestroy(&pixt1); } gplot1 = gplotCreate("/tmp/junkplot1", GPLOT_X11, "Fraction with given color (diff from average)", "white point space for red", "amount of color"); gplot2 = gplotCreate("/tmp/junkplot2", GPLOT_X11, "Fraction with given color (min diff)", "white point space for red", "amount of color"); for (j = 0; j < 6; j++) { na = numaaGetNuma(naa1, j, L_CLONE); sprintf(label, "thresh %d", 30 + 10 * j); gplotAddPlot(gplot1, naseq, na, GPLOT_LINES, label); numaDestroy(&na); na = numaaGetNuma(naa2, j, L_CLONE); gplotAddPlot(gplot2, naseq, na, GPLOT_LINES, label); numaDestroy(&na); } gplotMakeOutput(gplot1); gplotMakeOutput(gplot2); gplotDestroy(&gplot1); gplotDestroy(&gplot2); pixt1 = pixaDisplayTiledAndScaled(pixa, 32, 250, 4, 0, 10, 2); pixWrite("/tmp/junkcolormag", pixt1, IFF_PNG); pixDisplayWithTitle(pixt1, 0, 100, "Color magnitude", 1); pixDestroy(&pixt1); pixaDestroy(&pixa); numaDestroy(&naseq); numaaDestroy(&naa1); numaaDestroy(&naa2); pixDisplayMultiple("/tmp/display/file*"); pixDestroy(&pixs); return 0; }

/*! * pixaBinSort() * * Input: pixas * sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH, * L_SORT_BY_HEIGHT, L_SORT_BY_PERIMETER) * sortorder (L_SORT_INCREASING, L_SORT_DECREASING) * &naindex (<optional return> index of sorted order into * original array) * copyflag (L_COPY, L_CLONE) * Return: pixad (sorted version of pixas), or null on error * * Notes: * (1) This sorts based on the data in the boxa. If the boxa * count is not the same as the pixa count, this returns an error. * (2) The copyflag refers to the pix and box copies that are * inserted into the sorted pixa. These are either L_COPY * or L_CLONE. * (3) For a large number of boxes (say, greater than 1000), this * O(n) binsort is much faster than the O(nlogn) shellsort. * For 5000 components, this is over 20x faster than boxaSort(). * (4) Consequently, pixaSort() calls this function if it will * likely go much faster. */ PIXA * pixaBinSort(PIXA *pixas, l_int32 sorttype, l_int32 sortorder, NUMA **pnaindex, l_int32 copyflag) { l_int32 i, n, x, y, w, h; BOXA *boxa; NUMA *na, *naindex; PIXA *pixad; PROCNAME("pixaBinSort"); if (pnaindex) *pnaindex = NULL; if (!pixas) return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y && sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT && sorttype != L_SORT_BY_PERIMETER) return (PIXA *)ERROR_PTR("invalid sort type", procName, NULL); if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) return (PIXA *)ERROR_PTR("invalid sort order", procName, NULL); if (copyflag != L_COPY && copyflag != L_CLONE) return (PIXA *)ERROR_PTR("invalid copy flag", procName, NULL); /* Verify that the pixa and its boxa have the same count */ if ((boxa = pixas->boxa) == NULL) /* not owned; do not destroy */ return (PIXA *)ERROR_PTR("boxa not found", procName, NULL); n = pixaGetCount(pixas); if (boxaGetCount(boxa) != n) return (PIXA *)ERROR_PTR("boxa and pixa counts differ", procName, NULL); /* Generate Numa of appropriate box dimensions */ if ((na = numaCreate(n)) == NULL) return (PIXA *)ERROR_PTR("na not made", procName, NULL); for (i = 0; i < n; i++) { boxaGetBoxGeometry(boxa, i, &x, &y, &w, &h); switch (sorttype) { case L_SORT_BY_X: numaAddNumber(na, x); break; case L_SORT_BY_Y: numaAddNumber(na, y); break; case L_SORT_BY_WIDTH: numaAddNumber(na, w); break; case L_SORT_BY_HEIGHT: numaAddNumber(na, h); break; case L_SORT_BY_PERIMETER: numaAddNumber(na, w + h); break; default: L_WARNING("invalid sort type", procName); } } /* Get the sort index for data array */ if ((naindex = numaGetBinSortIndex(na, sortorder)) == NULL) return (PIXA *)ERROR_PTR("naindex not made", procName, NULL); /* Build up sorted pixa using sort index */ if ((pixad = pixaSortByIndex(pixas, naindex, copyflag)) == NULL) return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); if (pnaindex) *pnaindex = naindex; else numaDestroy(&naindex); numaDestroy(&na); return pixad; }

int main(int argc, char **argv) { l_int32 i, n, binsize, binstart, nbins; l_float32 pi, val, angle, xval, yval, x0, y0, startval, fbinsize; l_float32 minval, maxval, meanval, median, variance, rankval, rank, rmsdev; GPLOT *gplot; NUMA *na, *nahisto, *nax, *nay, *nap, *nasx, *nasy; NUMA *nadx, *nady, *nafx, *nafy, *na1, *na2, *na3, *na4; PIX *pixs, *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pixd; PIXA *pixa; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; lept_mkdir("lept/numa1"); /* -------------------------------------------------------------------* * Histograms * * -------------------------------------------------------------------*/ pi = 3.1415926535; na = numaCreate(5000); for (i = 0; i < 500000; i++) { angle = 0.02293 * i * pi; val = (l_float32)(999. * sin(angle)); numaAddNumber(na, val); } nahisto = numaMakeHistogramClipped(na, 6, 2000); nbins = numaGetCount(nahisto); nax = numaMakeSequence(0, 1, nbins); gplot = gplotCreate("/tmp/lept/numa1/histo1", GPLOT_PNG, "example histo 1", "i", "histo[i]"); gplotAddPlot(gplot, nax, nahisto, GPLOT_LINES, "sine"); gplotMakeOutput(gplot); gplotDestroy(&gplot); numaDestroy(&nax); numaDestroy(&nahisto); nahisto = numaMakeHistogram(na, 1000, &binsize, &binstart); nbins = numaGetCount(nahisto); nax = numaMakeSequence(binstart, binsize, nbins); fprintf(stderr, " binsize = %d, binstart = %d\n", binsize, binstart); gplot = gplotCreate("/tmp/lept/numa1/histo2", GPLOT_PNG, "example histo 2", "i", "histo[i]"); gplotAddPlot(gplot, nax, nahisto, GPLOT_LINES, "sine"); gplotMakeOutput(gplot); gplotDestroy(&gplot); numaDestroy(&nax); numaDestroy(&nahisto); nahisto = numaMakeHistogram(na, 1000, &binsize, NULL); nbins = numaGetCount(nahisto); nax = numaMakeSequence(0, binsize, nbins); fprintf(stderr, " binsize = %d, binstart = %d\n", binsize, 0); gplot = gplotCreate("/tmp/lept/numa1/histo3", GPLOT_PNG, "example histo 3", "i", "histo[i]"); gplotAddPlot(gplot, nax, nahisto, GPLOT_LINES, "sine"); gplotMakeOutput(gplot); gplotDestroy(&gplot); numaDestroy(&nax); numaDestroy(&nahisto); nahisto = numaMakeHistogramAuto(na, 1000); nbins = numaGetCount(nahisto); numaGetParameters(nahisto, &startval, &fbinsize); nax = numaMakeSequence(startval, fbinsize, nbins); fprintf(stderr, " binsize = %7.4f, binstart = %8.3f\n", fbinsize, startval); gplot = gplotCreate("/tmp/lept/numa1/histo4", GPLOT_PNG, "example histo 4", "i", "histo[i]"); gplotAddPlot(gplot, nax, nahisto, GPLOT_LINES, "sine"); gplotMakeOutput(gplot); gplotDestroy(&gplot); pix1 = pixRead("/tmp/lept/numa1/histo1.png"); pix2 = pixRead("/tmp/lept/numa1/histo2.png"); pix3 = pixRead("/tmp/lept/numa1/histo3.png"); pix4 = pixRead("/tmp/lept/numa1/histo4.png"); regTestWritePixAndCheck(rp, pix1, IFF_PNG); /* 0 */ regTestWritePixAndCheck(rp, pix2, IFF_PNG); /* 1 */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 2 */ regTestWritePixAndCheck(rp, pix4, IFF_PNG); /* 3 */ pixa = pixaCreate(4); pixaAddPix(pixa, pix1, L_INSERT); pixaAddPix(pixa, pix2, L_INSERT); pixaAddPix(pixa, pix3, L_INSERT); pixaAddPix(pixa, pix4, L_INSERT); if (rp->display) { pixd = pixaDisplayTiledInRows(pixa, 32, 1500, 1.0, 0, 20, 2); pixDisplayWithTitle(pixd, 0, 0, NULL, 1); pixDestroy(&pixd); } pixaDestroy(&pixa); numaDestroy(&nax); numaDestroy(&nahisto); numaGetStatsUsingHistogram(na, 2000, &minval, &maxval, &meanval, &variance, &median, 0.80, &rankval, &nahisto); rmsdev = sqrt((l_float64)variance); numaHistogramGetRankFromVal(nahisto, rankval, &rank); regTestCompareValues(rp, -999.00, minval, 0.1); /* 4 */ regTestCompareValues(rp, 999.00, maxval, 0.1); /* 5 */ regTestCompareValues(rp, 0.055, meanval, 0.001); /* 6 */ regTestCompareValues(rp, 0.30, median, 0.005); /* 7 */ regTestCompareValues(rp, 706.41, rmsdev, 0.1); /* 8 */ regTestCompareValues(rp, 808.15, rankval, 0.1); /* 9 */ regTestCompareValues(rp, 0.800, rank, 0.01); /* 10 */ if (rp->display) { fprintf(stderr, "Sin histogram: \n" " min val = %7.3f -- should be -999.00\n" " max val = %7.3f -- should be 999.00\n" " mean val = %7.3f -- should be 0.055\n" " median = %7.3f -- should be 0.30\n" " rmsdev = %7.3f -- should be 706.41\n" " rank val = %7.3f -- should be 808.152\n" " rank = %7.3f -- should be 0.800\n", minval, maxval, meanval, median, rmsdev, rankval, rank); } numaDestroy(&nahisto); numaDestroy(&na); /* -------------------------------------------------------------------* * Interpolation * * -------------------------------------------------------------------*/ /* Test numaInterpolateEqxInterval() */ pixs = pixRead("test8.jpg"); na = pixGetGrayHistogramMasked(pixs, NULL, 0, 0, 1); nasy = numaGetPartialSums(na); gplotSimple1(nasy, GPLOT_PNG, "/tmp/lept/numa1/int1", "partial sums"); gplotSimple1(na, GPLOT_PNG, "/tmp/lept/numa1/int2", "simple test"); numaInterpolateEqxInterval(0.0, 1.0, na, L_LINEAR_INTERP, 0.0, 255.0, 15, &nax, &nay); gplot = gplotCreate("/tmp/lept/numa1/int3", GPLOT_PNG, "test interpolation", "pix val", "num pix"); gplotAddPlot(gplot, nax, nay, GPLOT_LINES, "plot 1"); gplotMakeOutput(gplot); gplotDestroy(&gplot); numaDestroy(&na); numaDestroy(&nasy); numaDestroy(&nax); numaDestroy(&nay); pixDestroy(&pixs); /* Test numaInterpolateArbxInterval() */ pixs = pixRead("test8.jpg"); na = pixGetGrayHistogramMasked(pixs, NULL, 0, 0, 1); nasy = numaGetPartialSums(na); numaInsertNumber(nasy, 0, 0.0); nasx = numaMakeSequence(0.0, 1.0, 257); numaInterpolateArbxInterval(nasx, nasy, L_LINEAR_INTERP, 10.0, 250.0, 23, &nax, &nay); gplot = gplotCreate("/tmp/lept/numa1/int4", GPLOT_PNG, "arbx interpolation", "pix val", "cum num pix"); gplotAddPlot(gplot, nax, nay, GPLOT_LINES, "plot 1"); gplotMakeOutput(gplot); gplotDestroy(&gplot); numaDestroy(&na); numaDestroy(&nasx); numaDestroy(&nasy); numaDestroy(&nax); numaDestroy(&nay); pixDestroy(&pixs); /* Test numaInterpolateArbxVal() */ pixs = pixRead("test8.jpg"); na = pixGetGrayHistogramMasked(pixs, NULL, 0, 0, 1); nasy = numaGetPartialSums(na); numaInsertNumber(nasy, 0, 0.0); nasx = numaMakeSequence(0.0, 1.0, 257); nax = numaMakeSequence(15.0, (250.0 - 15.0) / 23.0, 24); n = numaGetCount(nax); nay = numaCreate(n); for (i = 0; i < n; i++) { numaGetFValue(nax, i, &xval); numaInterpolateArbxVal(nasx, nasy, L_QUADRATIC_INTERP, xval, &yval); numaAddNumber(nay, yval); } gplot = gplotCreate("/tmp/lept/numa1/int5", GPLOT_PNG, "arbx interpolation", "pix val", "cum num pix"); gplotAddPlot(gplot, nax, nay, GPLOT_LINES, "plot 1"); gplotMakeOutput(gplot); gplotDestroy(&gplot); numaDestroy(&na); numaDestroy(&nasx); numaDestroy(&nasy); numaDestroy(&nax); numaDestroy(&nay); pixDestroy(&pixs); /* Test interpolation */ nasx = numaRead("testangle.na"); nasy = numaRead("testscore.na"); gplot = gplotCreate("/tmp/lept/numa1/int6", GPLOT_PNG, "arbx interpolation", "angle", "score"); numaInterpolateArbxInterval(nasx, nasy, L_LINEAR_INTERP, -2.00, 0.0, 50, &nax, &nay); gplotAddPlot(gplot, nax, nay, GPLOT_LINES, "linear"); numaDestroy(&nax); numaDestroy(&nay); numaInterpolateArbxInterval(nasx, nasy, L_QUADRATIC_INTERP, -2.00, 0.0, 50, &nax, &nay); gplotAddPlot(gplot, nax, nay, GPLOT_LINES, "quadratic"); numaDestroy(&nax); numaDestroy(&nay); gplotMakeOutput(gplot); gplotDestroy(&gplot); gplot = gplotCreate("/tmp/lept/numa1/int7", GPLOT_PNG, "arbx interpolation", "angle", "score"); numaInterpolateArbxInterval(nasx, nasy, L_LINEAR_INTERP, -1.2, -0.8, 50, &nax, &nay); gplotAddPlot(gplot, nax, nay, GPLOT_LINES, "quadratic"); gplotMakeOutput(gplot); gplotDestroy(&gplot); numaFitMax(nay, &yval, nax, &xval); if (rp->display) fprintf(stderr, "max = %f at loc = %f\n", yval, xval); pixa = pixaCreate(7); pix1 = pixRead("/tmp/lept/numa1/int1.png"); pix2 = pixRead("/tmp/lept/numa1/int2.png"); pix3 = pixRead("/tmp/lept/numa1/int3.png"); pix4 = pixRead("/tmp/lept/numa1/int4.png"); pix5 = pixRead("/tmp/lept/numa1/int5.png"); pix6 = pixRead("/tmp/lept/numa1/int6.png"); pix7 = pixRead("/tmp/lept/numa1/int7.png"); regTestWritePixAndCheck(rp, pix1, IFF_PNG); /* 11 */ regTestWritePixAndCheck(rp, pix2, IFF_PNG); /* 12 */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 13 */ regTestWritePixAndCheck(rp, pix4, IFF_PNG); /* 14 */ regTestWritePixAndCheck(rp, pix5, IFF_PNG); /* 15 */ regTestWritePixAndCheck(rp, pix6, IFF_PNG); /* 16 */ regTestWritePixAndCheck(rp, pix7, IFF_PNG); /* 17 */ pixaAddPix(pixa, pix1, L_INSERT); pixaAddPix(pixa, pix2, L_INSERT); pixaAddPix(pixa, pix3, L_INSERT); pixaAddPix(pixa, pix4, L_INSERT); pixaAddPix(pixa, pix5, L_INSERT); pixaAddPix(pixa, pix6, L_INSERT); pixaAddPix(pixa, pix7, L_INSERT); if (rp->display) { pixd = pixaDisplayTiledInRows(pixa, 32, 1500, 1.0, 0, 20, 2); pixDisplayWithTitle(pixd, 300, 0, NULL, 1); pixDestroy(&pixd); } pixaDestroy(&pixa); numaDestroy(&nasx); numaDestroy(&nasy); numaDestroy(&nax); numaDestroy(&nay); /* -------------------------------------------------------------------* * Integration and differentiation * * -------------------------------------------------------------------*/ /* Test integration and differentiation */ nasx = numaRead("testangle.na"); nasy = numaRead("testscore.na"); /* ---------- Plot the derivative ---------- */ numaDifferentiateInterval(nasx, nasy, -2.0, 0.0, 50, &nadx, &nady); gplot = gplotCreate("/tmp/lept/numa1/diff1", GPLOT_PNG, "derivative", "angle", "slope"); gplotAddPlot(gplot, nadx, nady, GPLOT_LINES, "derivative"); gplotMakeOutput(gplot); gplotDestroy(&gplot); /* ---------- Plot the original function ----------- */ /* and the integral of the derivative; the two */ /* should be approximately the same. */ gplot = gplotCreate("/tmp/lept/numa1/diff2", GPLOT_PNG, "integ-diff", "angle", "val"); numaInterpolateArbxInterval(nasx, nasy, L_LINEAR_INTERP, -2.00, 0.0, 50, &nafx, &nafy); gplotAddPlot(gplot, nafx, nafy, GPLOT_LINES, "function"); n = numaGetCount(nadx); numaGetFValue(nafx, 0, &x0); numaGetFValue(nafy, 0, &y0); nay = numaCreate(n); /* (Note: this tests robustness of the integrator: we go from * i = 0, and choose to have only 1 point in the interpolation * there, which is too small and causes the function to bomb out.) */ fprintf(stderr, "We must get a 'npts < 2' error here:\n"); for (i = 0; i < n; i++) { numaGetFValue(nadx, i, &xval); numaIntegrateInterval(nadx, nady, x0, xval, 2 * i + 1, &yval); numaAddNumber(nay, y0 + yval); } gplotAddPlot(gplot, nafx, nay, GPLOT_LINES, "anti-derivative"); gplotMakeOutput(gplot); gplotDestroy(&gplot); pixa = pixaCreate(2); pix1 = pixRead("/tmp/lept/numa1/diff1.png"); pix2 = pixRead("/tmp/lept/numa1/diff2.png"); regTestWritePixAndCheck(rp, pix1, IFF_PNG); /* 18 */ regTestWritePixAndCheck(rp, pix2, IFF_PNG); /* 19 */ pixaAddPix(pixa, pix1, L_INSERT); pixaAddPix(pixa, pix2, L_INSERT); if (rp->display) { pixd = pixaDisplayTiledInRows(pixa, 32, 1500, 1.0, 0, 20, 2); pixDisplayWithTitle(pixd, 600, 0, NULL, 1); pixDestroy(&pixd); } pixaDestroy(&pixa); numaDestroy(&nasx); numaDestroy(&nasy); numaDestroy(&nafx); numaDestroy(&nafy); numaDestroy(&nadx); numaDestroy(&nady); numaDestroy(&nay); /* -------------------------------------------------------------------* * Rank extraction * * -------------------------------------------------------------------*/ /* Rank extraction with interpolation */ pixs = pixRead("test8.jpg"); nasy= pixGetGrayHistogramMasked(pixs, NULL, 0, 0, 1); numaMakeRankFromHistogram(0.0, 1.0, nasy, 350, &nax, &nay); gplot = gplotCreate("/tmp/lept/numa1/rank1", GPLOT_PNG, "test rank extractor", "pix val", "rank val"); gplotAddPlot(gplot, nax, nay, GPLOT_LINES, "plot 1"); gplotMakeOutput(gplot); gplotDestroy(&gplot); numaDestroy(&nasy); numaDestroy(&nax); numaDestroy(&nay); pixDestroy(&pixs); /* Rank extraction, point by point */ pixs = pixRead("test8.jpg"); nap = numaCreate(200); pixGetRankValueMasked(pixs, NULL, 0, 0, 2, 0.0, &val, &na); for (i = 0; i < 101; i++) { rank = 0.01 * i; numaHistogramGetValFromRank(na, rank, &val); numaAddNumber(nap, val); } gplotSimple1(nap, GPLOT_PNG, "/tmp/lept/numa1/rank2", "rank value"); pixa = pixaCreate(2); pix1 = pixRead("/tmp/lept/numa1/rank1.png"); pix2 = pixRead("/tmp/lept/numa1/rank2.png"); regTestWritePixAndCheck(rp, pix1, IFF_PNG); /* 20 */ regTestWritePixAndCheck(rp, pix2, IFF_PNG); /* 21 */ pixaAddPix(pixa, pix1, L_INSERT); pixaAddPix(pixa, pix2, L_INSERT); if (rp->display) { pixd = pixaDisplayTiledInRows(pixa, 32, 1500, 1.0, 0, 20, 2); pixDisplayWithTitle(pixd, 900, 0, NULL, 1); pixDestroy(&pixd); } pixaDestroy(&pixa); numaDestroy(&na); numaDestroy(&nap); pixDestroy(&pixs); /* -------------------------------------------------------------------* * Numa-morphology * * -------------------------------------------------------------------*/ na = numaRead("lyra.5.na"); gplotSimple1(na, GPLOT_PNG, "/tmp/lept/numa1/lyra1", "Original"); na1 = numaErode(na, 21); gplotSimple1(na1, GPLOT_PNG, "/tmp/lept/numa1/lyra2", "Erosion"); na2 = numaDilate(na, 21); gplotSimple1(na2, GPLOT_PNG, "/tmp/lept/numa1/lyra3", "Dilation"); na3 = numaOpen(na, 21); gplotSimple1(na3, GPLOT_PNG, "/tmp/lept/numa1/lyra4", "Opening"); na4 = numaClose(na, 21); gplotSimple1(na4, GPLOT_PNG, "/tmp/lept/numa1/lyra5", "Closing"); pixa = pixaCreate(2); pix1 = pixRead("/tmp/lept/numa1/lyra1.png"); pix2 = pixRead("/tmp/lept/numa1/lyra2.png"); pix3 = pixRead("/tmp/lept/numa1/lyra3.png"); pix4 = pixRead("/tmp/lept/numa1/lyra4.png"); pix5 = pixRead("/tmp/lept/numa1/lyra5.png"); pixaAddPix(pixa, pix1, L_INSERT); pixaAddPix(pixa, pix2, L_INSERT); pixaAddPix(pixa, pix3, L_INSERT); pixaAddPix(pixa, pix4, L_INSERT); pixaAddPix(pixa, pix5, L_INSERT); regTestWritePixAndCheck(rp, pix1, IFF_PNG); /* 22 */ regTestWritePixAndCheck(rp, pix2, IFF_PNG); /* 23 */ regTestWritePixAndCheck(rp, pix3, IFF_PNG); /* 24 */ regTestWritePixAndCheck(rp, pix4, IFF_PNG); /* 25 */ regTestWritePixAndCheck(rp, pix5, IFF_PNG); /* 26 */ if (rp->display) { pixd = pixaDisplayTiledInRows(pixa, 32, 1500, 1.0, 0, 20, 2); pixDisplayWithTitle(pixd, 1200, 0, NULL, 1); pixDestroy(&pixd); } pixaDestroy(&pixa); numaDestroy(&na); numaDestroy(&na1); numaDestroy(&na2); numaDestroy(&na3); numaDestroy(&na4); pixaDestroy(&pixa); return regTestCleanup(rp); }

/*! * \brief pixFindBaselines() * * \param[in] pixs 1 bpp, 300 ppi * \param[out] ppta [optional] pairs of pts corresponding to * approx. ends of each text line * \param[in] pixadb for debug output; use NULL to skip * \return na of baseline y values, or NULL on error * * <pre> * Notes: * (1) Input binary image must have text lines already aligned * horizontally. This can be done by either rotating the * image with pixDeskew(), or, if a projective transform * is required, by doing pixDeskewLocal() first. * (2) Input null for &pta if you don't want this returned. * The pta will come in pairs of points (left and right end * of each baseline). * (3) Caution: this will not work properly on text with multiple * columns, where the lines are not aligned between columns. * If there are multiple columns, they should be extracted * separately before finding the baselines. * (4) This function constructs different types of output * for baselines; namely, a set of raster line values and * a set of end points of each baseline. * (5) This function was designed to handle short and long text lines * without using dangerous thresholds on the peak heights. It does * this by combining the differential signal with a morphological * analysis of the locations of the text lines. One can also * combine this data to normalize the peak heights, by weighting * the differential signal in the region of each baseline * by the inverse of the width of the text line found there. * </pre> */ NUMA * pixFindBaselines(PIX *pixs, PTA **ppta, PIXA *pixadb) { l_int32 h, i, j, nbox, val1, val2, ndiff, bx, by, bw, bh; l_int32 imaxloc, peakthresh, zerothresh, inpeak; l_int32 mintosearch, max, maxloc, nloc, locval; l_int32 *array; l_float32 maxval; BOXA *boxa1, *boxa2, *boxa3; GPLOT *gplot; NUMA *nasum, *nadiff, *naloc, *naval; PIX *pix1, *pix2; PTA *pta; PROCNAME("pixFindBaselines"); if (ppta) *ppta = NULL; if (!pixs || pixGetDepth(pixs) != 1) return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); /* Close up the text characters, removing noise */ pix1 = pixMorphSequence(pixs, "c25.1 + e15.1", 0); /* Estimate the resolution */ if (pixadb) pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT); /* Save the difference of adjacent row sums. * The high positive-going peaks are the baselines */ if ((nasum = pixCountPixelsByRow(pix1, NULL)) == NULL) { pixDestroy(&pix1); return (NUMA *)ERROR_PTR("nasum not made", procName, NULL); } h = pixGetHeight(pixs); nadiff = numaCreate(h); numaGetIValue(nasum, 0, &val2); for (i = 0; i < h - 1; i++) { val1 = val2; numaGetIValue(nasum, i + 1, &val2); numaAddNumber(nadiff, val1 - val2); } numaDestroy(&nasum); if (pixadb) { /* show the difference signal */ lept_mkdir("lept/baseline"); gplotSimple1(nadiff, GPLOT_PNG, "/tmp/lept/baseline/diff", "Diff Sig"); pix2 = pixRead("/tmp/lept/baseline/diff.png"); pixaAddPix(pixadb, pix2, L_INSERT); } /* Use the zeroes of the profile to locate each baseline. */ array = numaGetIArray(nadiff); ndiff = numaGetCount(nadiff); numaGetMax(nadiff, &maxval, &imaxloc); numaDestroy(&nadiff); /* Use this to begin locating a new peak: */ peakthresh = (l_int32)maxval / PEAK_THRESHOLD_RATIO; /* Use this to begin a region between peaks: */ zerothresh = (l_int32)maxval / ZERO_THRESHOLD_RATIO; naloc = numaCreate(0); naval = numaCreate(0); inpeak = FALSE; for (i = 0; i < ndiff; i++) { if (inpeak == FALSE) { if (array[i] > peakthresh) { /* transition to in-peak */ inpeak = TRUE; mintosearch = i + MIN_DIST_IN_PEAK; /* accept no zeros * between i and mintosearch */ max = array[i]; maxloc = i; } } else { /* inpeak == TRUE; look for max */ if (array[i] > max) { max = array[i]; maxloc = i; mintosearch = i + MIN_DIST_IN_PEAK; } else if (i > mintosearch && array[i] <= zerothresh) { /* leave */ inpeak = FALSE; numaAddNumber(naval, max); numaAddNumber(naloc, maxloc); } } } LEPT_FREE(array); /* If array[ndiff-1] is max, eg. no descenders, baseline at bottom */ if (inpeak) { numaAddNumber(naval, max); numaAddNumber(naloc, maxloc); } if (pixadb) { /* show the raster locations for the peaks */ gplot = gplotCreate("/tmp/lept/baseline/loc", GPLOT_PNG, "Peak locs", "rasterline", "height"); gplotAddPlot(gplot, naloc, naval, GPLOT_POINTS, "locs"); gplotMakeOutput(gplot); gplotDestroy(&gplot); pix2 = pixRead("/tmp/lept/baseline/loc.png"); pixaAddPix(pixadb, pix2, L_INSERT); } numaDestroy(&naval); /* Generate an approximate profile of text line width. * First, filter the boxes of text, where there may be * more than one box for a given textline. */ pix2 = pixMorphSequence(pix1, "r11 + c20.1 + o30.1 +c1.3", 0); if (pixadb) pixaAddPix(pixadb, pix2, L_COPY); boxa1 = pixConnComp(pix2, NULL, 4); pixDestroy(&pix1); pixDestroy(&pix2); if (boxaGetCount(boxa1) == 0) { numaDestroy(&naloc); boxaDestroy(&boxa1); L_INFO("no compnents after filtering\n", procName); return NULL; } boxa2 = boxaTransform(boxa1, 0, 0, 4., 4.); boxa3 = boxaSort(boxa2, L_SORT_BY_Y, L_SORT_INCREASING, NULL); boxaDestroy(&boxa1); boxaDestroy(&boxa2); /* Optionally, find the baseline segments */ pta = NULL; if (ppta) { pta = ptaCreate(0); *ppta = pta; } if (pta) { nloc = numaGetCount(naloc); nbox = boxaGetCount(boxa3); for (i = 0; i < nbox; i++) { boxaGetBoxGeometry(boxa3, i, &bx, &by, &bw, &bh); for (j = 0; j < nloc; j++) { numaGetIValue(naloc, j, &locval); if (L_ABS(locval - (by + bh)) > 25) continue; ptaAddPt(pta, bx, locval); ptaAddPt(pta, bx + bw, locval); break; } } } boxaDestroy(&boxa3); if (pixadb && pta) { /* display baselines */ l_int32 npts, x1, y1, x2, y2; pix1 = pixConvertTo32(pixs); npts = ptaGetCount(pta); for (i = 0; i < npts; i += 2) { ptaGetIPt(pta, i, &x1, &y1); ptaGetIPt(pta, i + 1, &x2, &y2); pixRenderLineArb(pix1, x1, y1, x2, y2, 2, 255, 0, 0); } pixWrite("/tmp/lept/baseline/baselines.png", pix1, IFF_PNG); pixaAddPix(pixadb, pixScale(pix1, 0.25, 0.25), L_INSERT); pixDestroy(&pix1); } return naloc; }

int main(int argc, char **argv) { char *fname, *filename; const char *str; char buffer[512]; l_int32 i, npages; size_t length; FILE *fp; NUMA *naflags, *nasizes; PIX *pix, *pix1, *pix2, *pixd; PIXA *pixa; PIXCMAP *cmap; SARRAY *savals, *satypes, *sa; static char mainName[] = "mtifftest"; if (argc != 1) return ERROR_INT(" Syntax: mtifftest", mainName, 1); lept_mkdir("tiff"); #if 1 /* ------------------ Test multipage I/O -------------------*/ /* This puts every image file in the directory with a string * match to "weasel" into a multipage tiff file. * Images with 1 bpp are coded as g4; the others as zip. * It then reads back into a pix and displays. */ writeMultipageTiff(".", "weasel8.", "/tmp/tiff/weasel8.tif"); pixa = pixaReadMultipageTiff("/tmp/tiff/weasel8.tif"); pixd = pixaDisplayTiledInRows(pixa, 1, 1200, 0.5, 0, 15, 4); pixDisplay(pixd, 100, 0); pixDestroy(&pixd); pixd = pixaDisplayTiledInRows(pixa, 8, 1200, 0.8, 0, 15, 4); pixDisplay(pixd, 100, 200); pixDestroy(&pixd); pixd = pixaDisplayTiledInRows(pixa, 32, 1200, 1.2, 0, 15, 4); pixDisplay(pixd, 100, 400); pixDestroy(&pixd); pixaDestroy(&pixa); #endif #if 1 /* ------------ Test single-to-multipage I/O -------------------*/ /* Read the files and generate a multipage tiff file of G4 images. * Then convert that to a G4 compressed and ascii85 encoded PS file. */ sa = getSortedPathnamesInDirectory(".", "weasel4.", 0, 4); sarrayWriteStream(stderr, sa); sarraySort(sa, sa, L_SORT_INCREASING); sarrayWriteStream(stderr, sa); npages = sarrayGetCount(sa); for (i = 0; i < npages; i++) { fname = sarrayGetString(sa, i, 0); filename = genPathname(".", fname); pix1 = pixRead(filename); if (!pix1) continue; pix2 = pixConvertTo1(pix1, 128); if (i == 0) pixWriteTiff("/tmp/tiff/weasel4", pix2, IFF_TIFF_G4, "w+"); else pixWriteTiff("/tmp/tiff/weasel4", pix2, IFF_TIFF_G4, "a"); pixDestroy(&pix1); pixDestroy(&pix2); lept_free(filename); } /* Write it out as a PS file */ convertTiffMultipageToPS("/tmp/tiff/weasel4", "/tmp/tiff/weasel4.ps", NULL, 0.95); sarrayDestroy(&sa); #endif #if 1 /* ------------------ Test multipage I/O -------------------*/ /* Read count of pages in tiff multipage file */ writeMultipageTiff(".", "weasel2", weasel_orig); fp = lept_fopen(weasel_orig, "rb"); if (fileFormatIsTiff(fp)) { tiffGetCount(fp, &npages); fprintf(stderr, " Tiff: %d page\n", npages); } else return ERROR_INT(" file not tiff", mainName, 1); lept_fclose(fp); /* Split into separate page files */ for (i = 0; i < npages + 1; i++) { /* read one beyond to catch error */ if (i == npages) L_INFO("Errors in next 2 lines are intentional!\n", mainName); pix = pixReadTiff(weasel_orig, i); if (!pix) continue; sprintf(buffer, "/tmp/tiff/%03d.tif", i); pixWrite(buffer, pix, IFF_TIFF_ZIP); pixDestroy(&pix); } /* Read separate page files and write reversed file */ for (i = npages - 1; i >= 0; i--) { sprintf(buffer, "/tmp/tiff/%03d.tif", i); pix = pixRead(buffer); if (!pix) continue; if (i == npages - 1) pixWriteTiff(weasel_rev, pix, IFF_TIFF_ZIP, "w+"); else pixWriteTiff(weasel_rev, pix, IFF_TIFF_ZIP, "a"); pixDestroy(&pix); } /* Read reversed file and reverse again */ pixa = pixaCreate(npages); for (i = 0; i < npages; i++) { pix = pixReadTiff(weasel_rev, i); pixaAddPix(pixa, pix, L_INSERT); } for (i = npages - 1; i >= 0; i--) { pix = pixaGetPix(pixa, i, L_CLONE); if (i == npages - 1) pixWriteTiff(weasel_rev_rev, pix, IFF_TIFF_ZIP, "w+"); else pixWriteTiff(weasel_rev_rev, pix, IFF_TIFF_ZIP, "a"); pixDestroy(&pix); } pixaDestroy(&pixa); #endif #if 0 /* ----- test adding custom public tags to a tiff header ----- */ pix = pixRead("feyn.tif"); naflags = numaCreate(10); savals = sarrayCreate(10); satypes = sarrayCreate(10); nasizes = numaCreate(10); /* numaAddNumber(naflags, TIFFTAG_XMLPACKET); */ /* XMP: 700 */ numaAddNumber(naflags, 700); str = "<xmp>This is a Fake XMP packet</xmp>\n<text>Guess what ...?</text>"; length = strlen(str); sarrayAddString(savals, (char *)str, L_COPY); sarrayAddString(satypes, (char *)"char*", L_COPY); numaAddNumber(nasizes, length); /* get it all */ numaAddNumber(naflags, 269); /* DOCUMENTNAME */ sarrayAddString(savals, (char *)"One silly title", L_COPY); sarrayAddString(satypes, (char *)"const char*", L_COPY); numaAddNumber(naflags, 270); /* IMAGEDESCRIPTION */ sarrayAddString(savals, (char *)"One page of text", L_COPY); sarrayAddString(satypes, (char *)"const char*", L_COPY); /* the max sample is used by rendering programs * to scale the dynamic range */ numaAddNumber(naflags, 281); /* MAXSAMPLEVALUE */ sarrayAddString(savals, (char *)"4", L_COPY); sarrayAddString(satypes, (char *)"l_uint16", L_COPY); /* note that date is required to be a 20 byte string */ numaAddNumber(naflags, 306); /* DATETIME */ sarrayAddString(savals, (char *)"2004:10:11 09:35:15", L_COPY); sarrayAddString(satypes, (char *)"const char*", L_COPY); /* note that page number requires 2 l_uint16 input */ numaAddNumber(naflags, 297); /* PAGENUMBER */ sarrayAddString(savals, (char *)"1-412", L_COPY); sarrayAddString(satypes, (char *)"l_uint16-l_uint16", L_COPY); pixWriteTiffCustom("/tmp/tiff/tags.tif", pix, IFF_TIFF_G4, "w", naflags, savals, satypes, nasizes); fprintTiffInfo(stderr, (char *)"/tmp/tiff/tags.tif"); fprintf(stderr, "num flags = %d\n", numaGetCount(naflags)); fprintf(stderr, "num sizes = %d\n", numaGetCount(nasizes)); fprintf(stderr, "num vals = %d\n", sarrayGetCount(savals)); fprintf(stderr, "num types = %d\n", sarrayGetCount(satypes)); numaDestroy(&naflags); numaDestroy(&nasizes); sarrayDestroy(&savals); sarrayDestroy(&satypes); pixDestroy(&pix); #endif return 0; }

/*! * pixGetRunsOnLine() * * Input: pixs (1 bpp) * x1, y1, x2, y2 * Return: numa, or null on error * * Notes: * (1) Action: this function uses the bresenham algorithm to compute * the pixels along the specified line. It returns a Numa of the * runlengths of the fg (black) and bg (white) runs, always * starting with a white run. * (2) If the first pixel on the line is black, the length of the * first returned run (which is white) is 0. */ NUMA * pixGetRunsOnLine(PIX *pixs, l_int32 x1, l_int32 y1, l_int32 x2, l_int32 y2) { l_int32 w, h, x, y, npts; l_int32 i, runlen, preval; l_uint32 val; NUMA *numa; PTA *pta; PROCNAME("pixGetRunsOnLine"); if (!pixs) return (NUMA *)ERROR_PTR("pixs not defined", procName, NULL); if (pixGetDepth(pixs) != 1) return (NUMA *)ERROR_PTR("pixs not 1 bpp", procName, NULL); w = pixGetWidth(pixs); h = pixGetHeight(pixs); if (x1 < 0 || x1 >= w) return (NUMA *)ERROR_PTR("x1 not valid", procName, NULL); if (x2 < 0 || x2 >= w) return (NUMA *)ERROR_PTR("x2 not valid", procName, NULL); if (y1 < 0 || y1 >= h) return (NUMA *)ERROR_PTR("y1 not valid", procName, NULL); if (y2 < 0 || y2 >= h) return (NUMA *)ERROR_PTR("y2 not valid", procName, NULL); if ((pta = generatePtaLine(x1, y1, x2, y2)) == NULL) return (NUMA *)ERROR_PTR("pta not made", procName, NULL); if ((npts = ptaGetCount(pta)) == 0) return (NUMA *)ERROR_PTR("pta has no pts", procName, NULL); if ((numa = numaCreate(0)) == NULL) return (NUMA *)ERROR_PTR("numa not made", procName, NULL); for (i = 0; i < npts; i++) { ptaGetIPt(pta, i, &x, &y); pixGetPixel(pixs, x, y, &val); if (i == 0) { if (val == 1) { /* black pixel; append white run of size 0 */ numaAddNumber(numa, 0); } preval = val; runlen = 1; continue; } if (val == preval) { /* extend current run */ preval = val; runlen++; } else { /* end previous run */ numaAddNumber(numa, runlen); preval = val; runlen = 1; } } numaAddNumber(numa, runlen); /* append last run */ ptaDestroy(&pta); return numa; }

/*! * \brief numaaCompareImagesByBoxes() * * \param[in] naa1 for image 1, formatted by boxaExtractSortedPattern() * \param[in] naa2 ditto; for image 2 * \param[in] nperline number of box regions to be used in each textline * \param[in] nreq number of complete row matches required * \param[in] maxshiftx max allowed x shift between two patterns, in pixels * \param[in] maxshifty max allowed y shift between two patterns, in pixels * \param[in] delx max allowed difference in x data, after alignment * \param[in] dely max allowed difference in y data, after alignment * \param[out] psame 1 if %nreq row matches are found; 0 otherwise * \param[in] debugflag 1 for debug output * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) Each input numaa describes a set of sorted bounding boxes * (sorted by textline and, within each textline, from * left to right) in the images from which they are derived. * See boxaExtractSortedPattern() for a description of the data * format in each of the input numaa. * (2) This function does an alignment between the input * descriptions of bounding boxes for two images. The * input parameter %nperline specifies the number of boxes * to consider in each line when testing for a match, and * %nreq is the required number of lines that must be well-aligned * to get a match. * (3) Testing by alignment has 3 steps: * (a) Generating the location of word bounding boxes from the * images (prior to calling this function). * (b) Listing all possible pairs of aligned rows, based on * tolerances in horizontal and vertical positions of * the boxes. Specifically, all pairs of rows are enumerated * whose first %nperline boxes can be brought into close * alignment, based on the delx parameter for boxes in the * line and within the overall the %maxshiftx and %maxshifty * constraints. * (c) Each pair, starting with the first, is used to search * for a set of %nreq - 1 other pairs that can all be aligned * with a difference in global translation of not more * than (%delx, %dely). * </pre> */ l_int32 numaaCompareImagesByBoxes(NUMAA *naa1, NUMAA *naa2, l_int32 nperline, l_int32 nreq, l_int32 maxshiftx, l_int32 maxshifty, l_int32 delx, l_int32 dely, l_int32 *psame, l_int32 debugflag) { l_int32 n1, n2, i, j, nbox, y1, y2, xl1, xl2; l_int32 shiftx, shifty, match; l_int32 *line1, *line2; /* indicator for sufficient boxes in a line */ l_int32 *yloc1, *yloc2; /* arrays of y value for first box in a line */ l_int32 *xleft1, *xleft2; /* arrays of x value for left side of first box */ NUMA *na1, *na2, *nai1, *nai2, *nasx, *nasy; PROCNAME("numaaCompareImagesByBoxes"); if (!psame) return ERROR_INT("&same not defined", procName, 1); *psame = 0; if (!naa1) return ERROR_INT("naa1 not defined", procName, 1); if (!naa2) return ERROR_INT("naa2 not defined", procName, 1); if (nperline < 1) return ERROR_INT("nperline < 1", procName, 1); if (nreq < 1) return ERROR_INT("nreq < 1", procName, 1); n1 = numaaGetCount(naa1); n2 = numaaGetCount(naa2); if (n1 < nreq || n2 < nreq) return 0; /* Find the lines in naa1 and naa2 with sufficient boxes. * Also, find the y-values for each of the lines, and the * LH x-values of the first box in each line. */ line1 = (l_int32 *)LEPT_CALLOC(n1, sizeof(l_int32)); line2 = (l_int32 *)LEPT_CALLOC(n2, sizeof(l_int32)); yloc1 = (l_int32 *)LEPT_CALLOC(n1, sizeof(l_int32)); yloc2 = (l_int32 *)LEPT_CALLOC(n2, sizeof(l_int32)); xleft1 = (l_int32 *)LEPT_CALLOC(n1, sizeof(l_int32)); xleft2 = (l_int32 *)LEPT_CALLOC(n2, sizeof(l_int32)); for (i = 0; i < n1; i++) { na1 = numaaGetNuma(naa1, i, L_CLONE); numaGetIValue(na1, 0, yloc1 + i); numaGetIValue(na1, 1, xleft1 + i); nbox = (numaGetCount(na1) - 1) / 2; if (nbox >= nperline) line1[i] = 1; numaDestroy(&na1); } for (i = 0; i < n2; i++) { na2 = numaaGetNuma(naa2, i, L_CLONE); numaGetIValue(na2, 0, yloc2 + i); numaGetIValue(na2, 1, xleft2 + i); nbox = (numaGetCount(na2) - 1) / 2; if (nbox >= nperline) line2[i] = 1; numaDestroy(&na2); } /* Enumerate all possible line matches. A 'possible' line * match is one where the x and y shifts for the first box * in each line are within the maxshiftx and maxshifty * constraints, and the left and right sides of the remaining * (nperline - 1) successive boxes are within delx of each other. * The result is a set of four numas giving parameters of * each set of matching lines. */ nai1 = numaCreate(0); /* line index 1 of match */ nai2 = numaCreate(0); /* line index 2 of match */ nasx = numaCreate(0); /* shiftx for match */ nasy = numaCreate(0); /* shifty for match */ for (i = 0; i < n1; i++) { if (line1[i] == 0) continue; y1 = yloc1[i]; xl1 = xleft1[i]; na1 = numaaGetNuma(naa1, i, L_CLONE); for (j = 0; j < n2; j++) { if (line2[j] == 0) continue; y2 = yloc2[j]; if (L_ABS(y1 - y2) > maxshifty) continue; xl2 = xleft2[j]; if (L_ABS(xl1 - xl2) > maxshiftx) continue; shiftx = xl1 - xl2; /* shift to add to x2 values */ shifty = y1 - y2; /* shift to add to y2 values */ na2 = numaaGetNuma(naa2, j, L_CLONE); /* Now check if 'nperline' boxes in the two lines match */ match = testLineAlignmentX(na1, na2, shiftx, delx, nperline); if (match) { numaAddNumber(nai1, i); numaAddNumber(nai2, j); numaAddNumber(nasx, shiftx); numaAddNumber(nasy, shifty); } numaDestroy(&na2); } numaDestroy(&na1); } /* Determine if there are a sufficient number of mutually * aligned matches. Mutually aligned matches place an additional * constraint on the 'possible' matches, where the relative * shifts must not exceed the (delx, dely) distances. */ countAlignedMatches(nai1, nai2, nasx, nasy, n1, n2, delx, dely, nreq, psame, debugflag); LEPT_FREE(line1); LEPT_FREE(line2); LEPT_FREE(yloc1); LEPT_FREE(yloc2); LEPT_FREE(xleft1); LEPT_FREE(xleft2); numaDestroy(&nai1); numaDestroy(&nai2); numaDestroy(&nasx); numaDestroy(&nasy); return 0; }

/*! * pixSplitComponentWithProfile() * * Input: pixs (1 bpp, exactly one connected component) * delta (distance used in extrema finding in a numa; typ. 10) * mindel (minimum required difference between profile minimum * and profile values +2 and -2 away; typ. 7) * &pixdebug (<optional return> debug image of splitting) * Return: boxa (of c.c. after splitting), or null on error * * Notes: * (1) This will split the most obvious cases of touching characters. * The split points it is searching for are narrow and deep * minimima in the vertical pixel projection profile, after a * large vertical closing has been applied to the component. */ BOXA * pixSplitComponentWithProfile(PIX *pixs, l_int32 delta, l_int32 mindel, PIX **ppixdebug) { l_int32 w, h, n2, i, firstmin, xmin, xshift; l_int32 nmin, nleft, nright, nsplit, isplit, ncomp; l_int32 *array1, *array2; BOX *box; BOXA *boxad; NUMA *na1, *na2, *nasplit; PIX *pix1, *pixdb; PROCNAME("pixSplitComponentsWithProfile"); if (ppixdebug) *ppixdebug = NULL; if (!pixs || pixGetDepth(pixs) != 1) return (BOXA *)ERROR_PTR("pixa undefined or not 1 bpp", procName, NULL); pixGetDimensions(pixs, &w, &h, NULL); /* Closing to consolidate characters vertically */ pix1 = pixCloseSafeBrick(NULL, pixs, 1, 100); /* Get extrema of column projections */ boxad = boxaCreate(2); na1 = pixCountPixelsByColumn(pix1); /* w elements */ pixDestroy(&pix1); na2 = numaFindExtrema(na1, delta); n2 = numaGetCount(na2); if (n2 < 3) { /* no split possible */ box = boxCreate(0, 0, w, h); boxaAddBox(boxad, box, L_INSERT); numaDestroy(&na1); numaDestroy(&na2); return boxad; } /* Look for sufficiently deep and narrow minima. * All minima of of interest must be surrounded by max on each * side. firstmin is the index of first possible minimum. */ array1 = numaGetIArray(na1); array2 = numaGetIArray(na2); if (ppixdebug) numaWriteStream(stderr, na2); firstmin = (array1[array2[0]] > array1[array2[1]]) ? 1 : 2; nasplit = numaCreate(n2); /* will hold split locations */ for (i = firstmin; i < n2 - 1; i+= 2) { xmin = array2[i]; nmin = array1[xmin]; if (xmin + 2 >= w) break; /* no more splits possible */ nleft = array1[xmin - 2]; nright = array1[xmin + 2]; if (ppixdebug) { fprintf(stderr, "Splitting: xmin = %d, w = %d; nl = %d, nmin = %d, nr = %d\n", xmin, w, nleft, nmin, nright); } if (nleft - nmin >= mindel && nright - nmin >= mindel) /* split */ numaAddNumber(nasplit, xmin); } nsplit = numaGetCount(nasplit); #if 0 if (ppixdebug && nsplit > 0) gplotSimple1(na1, GPLOT_X11, "/tmp/splitroot", NULL); #endif numaDestroy(&na1); numaDestroy(&na2); FREE(array1); FREE(array2); if (nsplit == 0) { /* no splitting */ box = boxCreate(0, 0, w, h); boxaAddBox(boxad, box, L_INSERT); return boxad; } /* Use split points to generate b.b. after splitting */ for (i = 0, xshift = 0; i < nsplit; i++) { numaGetIValue(nasplit, i, &isplit); box = boxCreate(xshift, 0, isplit - xshift, h); boxaAddBox(boxad, box, L_INSERT); xshift = isplit + 1; } box = boxCreate(xshift, 0, w - xshift, h); boxaAddBox(boxad, box, L_INSERT); numaDestroy(&nasplit); if (ppixdebug) { pixdb = pixConvertTo32(pixs); ncomp = boxaGetCount(boxad); for (i = 0; i < ncomp; i++) { box = boxaGetBox(boxad, i, L_CLONE); pixRenderBoxBlend(pixdb, box, 1, 255, 0, 0, 0.5); boxDestroy(&box); } *ppixdebug = pixdb; } return boxad; }

/*! * addColorizedGrayToCmap() * * Input: cmap (from 2 or 4 bpp pix) * type (L_PAINT_LIGHT, L_PAINT_DARK) * rval, gval, bval (target color) * &na (<optional return> table for mapping new cmap entries) * Return: 0 if OK; 1 on error; 2 if new colors will not fit in cmap. * * Notes: * (1) If type == L_PAINT_LIGHT, it colorizes non-black pixels, * preserving antialiasing. * If type == L_PAINT_DARK, it colorizes non-white pixels, * preserving antialiasing. * (2) This increases the colormap size by the number of * different gray (non-black or non-white) colors in the * input colormap. If there is not enough room in the colormap * for this expansion, it returns 1 (treated as a warning); * the caller should check the return value. * (3) This can be used to determine if the new colors will fit in * the cmap, using null for &na. Returns 0 if they fit; 2 if * they don't fit. * (4) The mapping table contains, for each gray color found, the * index of the corresponding colorized pixel. Non-gray * pixels are assigned the invalid index 256. * (5) See pixColorGrayCmap() for usage. */ l_int32 addColorizedGrayToCmap(PIXCMAP *cmap, l_int32 type, l_int32 rval, l_int32 gval, l_int32 bval, NUMA **pna) { l_int32 i, n, erval, egval, ebval, nrval, ngval, nbval, newindex; NUMA *na; PROCNAME("addColorizedGrayToCmap"); if (pna) *pna = NULL; if (!cmap) return ERROR_INT("cmap not defined", procName, 1); if (type != L_PAINT_DARK && type != L_PAINT_LIGHT) return ERROR_INT("invalid type", procName, 1); n = pixcmapGetCount(cmap); na = numaCreate(n); for (i = 0; i < n; i++) { pixcmapGetColor(cmap, i, &erval, &egval, &ebval); if (type == L_PAINT_LIGHT) { if (erval == egval && erval == ebval && erval != 0) { nrval = (l_int32)(rval * (l_float32)erval / 255.); ngval = (l_int32)(gval * (l_float32)egval / 255.); nbval = (l_int32)(bval * (l_float32)ebval / 255.); if (pixcmapAddNewColor(cmap, nrval, ngval, nbval, &newindex)) { numaDestroy(&na); L_WARNING("no room; colormap full\n", procName); return 2; } numaAddNumber(na, newindex); } else { numaAddNumber(na, 256); /* invalid number; not gray */ } } else { /* L_PAINT_DARK */ if (erval == egval && erval == ebval && erval != 255) { nrval = rval + (l_int32)((255. - rval) * (l_float32)erval / 255.); ngval = gval + (l_int32)((255. - gval) * (l_float32)egval / 255.); nbval = bval + (l_int32)((255. - bval) * (l_float32)ebval / 255.); if (pixcmapAddNewColor(cmap, nrval, ngval, nbval, &newindex)) { numaDestroy(&na); L_WARNING("no room; colormap full\n", procName); return 2; } numaAddNumber(na, newindex); } else { numaAddNumber(na, 256); /* invalid number; not gray */ } } } if (pna) *pna = na; else numaDestroy(&na); return 0; }

/*! * dewarpBuildModel() * * Input: dew * debugflag (1 for debugging output) * Return: 0 if OK, 1 on error * * Notes: * (1) This is the basic function that builds the vertical * disparity array, which allows determination of the * src pixel in the input image corresponding to each * dest pixel in the dewarped image. * (2) The method is as follows: * * Estimate the centers of all the long textlines and * fit a LS quadratic to each one. This smooths the curves. * * Sample each curve at a regular interval, find the y-value * of the flat point on each curve, and subtract the sampled * curve value from this value. This is the vertical * disparity. * * Fit a LS quadratic to each set of vertically aligned * disparity samples. This smooths the disparity values * in the vertical direction. Then resample at the same * regular interval, We now have a regular grid of smoothed * vertical disparity valuels. * * Interpolate this grid to get a full resolution disparity * map. This can be applied directly to the src image * pixels to dewarp the image in the vertical direction, * making all textlines horizontal. */ l_int32 dewarpBuildModel(L_DEWARP *dew, l_int32 debugflag) { char *tempname; l_int32 i, j, nlines, nx, ny, sampling; l_float32 c0, c1, c2, x, y, flaty, val; l_float32 *faflats; NUMA *nax, *nafit, *nacurve, *nacurves, *naflat, *naflats, *naflatsi; PIX *pixs, *pixt1, *pixt2; PTA *pta, *ptad; PTAA *ptaa1, *ptaa2, *ptaa3, *ptaa4, *ptaa5, *ptaa6, *ptaa7; FPIX *fpix1, *fpix2, *fpix3; PROCNAME("dewarpBuildModel"); if (!dew) return ERROR_INT("dew not defined", procName, 1); pixs = dew->pixs; if (debugflag) { pixDisplayWithTitle(pixs, 0, 0, "pixs", 1); pixWriteTempfile("/tmp", "pixs.png", pixs, IFF_PNG, NULL); } /* Make initial estimate of centers of textlines */ ptaa1 = pixGetTextlineCenters(pixs, DEBUG_TEXTLINE_CENTERS); if (debugflag) { pixt1 = pixConvertTo32(pixs); pixt2 = pixDisplayPtaa(pixt1, ptaa1); pixWriteTempfile("/tmp", "lines1.png", pixt2, IFF_PNG, NULL); pixDestroy(&pixt1); pixDestroy(&pixt2); } /* Remove all lines that are not near the length * of the longest line. */ ptaa2 = ptaaRemoveShortLines(pixs, ptaa1, 0.8, DEBUG_SHORT_LINES); if (debugflag) { pixt1 = pixConvertTo32(pixs); pixt2 = pixDisplayPtaa(pixt1, ptaa2); pixWriteTempfile("/tmp", "lines2.png", pixt2, IFF_PNG, NULL); pixDestroy(&pixt1); pixDestroy(&pixt2); } nlines = ptaaGetCount(ptaa2); if (nlines < dew->minlines) return ERROR_INT("insufficient lines to build model", procName, 1); /* Do quadratic fit to smooth each line. A single quadratic * over the entire width of the line appears to be sufficient. * Quartics tend to overfit to noise. Each line is thus * represented by three coefficients: c2 * x^2 + c1 * x + c0. * Using the coefficients, sample each fitted curve uniformly * across the full width of the image. */ sampling = dew->sampling; nx = dew->nx; ny = dew->ny; ptaa3 = ptaaCreate(nlines); nacurve = numaCreate(nlines); /* stores curvature coeff c2 */ for (i = 0; i < nlines; i++) { /* for each line */ pta = ptaaGetPta(ptaa2, i, L_CLONE); ptaGetQuadraticLSF(pta, &c2, &c1, &c0, NULL); numaAddNumber(nacurve, c2); ptad = ptaCreate(nx); for (j = 0; j < nx; j++) { /* uniformly sampled in x */ x = j * sampling; applyQuadraticFit(c2, c1, c0, x, &y); ptaAddPt(ptad, x, y); } ptaaAddPta(ptaa3, ptad, L_INSERT); ptaDestroy(&pta); } if (debugflag) { ptaa4 = ptaaCreate(nlines); for (i = 0; i < nlines; i++) { pta = ptaaGetPta(ptaa2, i, L_CLONE); ptaGetArrays(pta, &nax, NULL); ptaGetQuadraticLSF(pta, NULL, NULL, NULL, &nafit); ptad = ptaCreateFromNuma(nax, nafit); ptaaAddPta(ptaa4, ptad, L_INSERT); ptaDestroy(&pta); numaDestroy(&nax); numaDestroy(&nafit); } pixt1 = pixConvertTo32(pixs); pixt2 = pixDisplayPtaa(pixt1, ptaa4); pixWriteTempfile("/tmp", "lines3.png", pixt2, IFF_PNG, NULL); pixDestroy(&pixt1); pixDestroy(&pixt2); ptaaDestroy(&ptaa4); } /* Find and save the flat points in each curve. */ naflat = numaCreate(nlines); for (i = 0; i < nlines; i++) { pta = ptaaGetPta(ptaa3, i, L_CLONE); numaGetFValue(nacurve, i, &c2); if (c2 <= 0) /* flat point at bottom; max value of y in curve */ ptaGetRange(pta, NULL, NULL, NULL, &flaty); else /* flat point at top; min value of y in curve */ ptaGetRange(pta, NULL, NULL, &flaty, NULL); numaAddNumber(naflat, flaty); ptaDestroy(&pta); } /* Sort the lines in ptaa3 by their position */ naflatsi = numaGetSortIndex(naflat, L_SORT_INCREASING); naflats = numaSortByIndex(naflat, naflatsi); nacurves = numaSortByIndex(nacurve, naflatsi); dew->naflats = naflats; dew->nacurves = nacurves; ptaa4 = ptaaSortByIndex(ptaa3, naflatsi); numaDestroy(&naflat); numaDestroy(&nacurve); numaDestroy(&naflatsi); if (debugflag) { tempname = genTempFilename("/tmp", "naflats.na", 0); numaWrite(tempname, naflats); FREE(tempname); } /* Convert the sampled points in ptaa3 to a sampled disparity with * with respect to the flat point in the curve. */ ptaa5 = ptaaCreate(nlines); for (i = 0; i < nlines; i++) { pta = ptaaGetPta(ptaa4, i, L_CLONE); numaGetFValue(naflats, i, &flaty); ptad = ptaCreate(nx); for (j = 0; j < nx; j++) { ptaGetPt(pta, j, &x, &y); ptaAddPt(ptad, x, flaty - y); } ptaaAddPta(ptaa5, ptad, L_INSERT); ptaDestroy(&pta); } if (debugflag) { tempname = genTempFilename("/tmp", "ptaa5.ptaa", 0); ptaaWrite(tempname, ptaa5, 0); FREE(tempname); } /* Generate a ptaa taking vertical 'columns' from ptaa5. * We want to fit the vertical disparity on the column to the * vertical position of the line, which we call 'y' here and * obtain from naflats. */ ptaa6 = ptaaCreate(nx); faflats = numaGetFArray(naflats, L_NOCOPY); for (j = 0; j < nx; j++) { pta = ptaCreate(nlines); for (i = 0; i < nlines; i++) { y = faflats[i]; ptaaGetPt(ptaa5, i, j, NULL, &val); /* disparity value */ ptaAddPt(pta, y, val); } ptaaAddPta(ptaa6, pta, L_INSERT); } if (debugflag) { tempname = genTempFilename("/tmp", "ptaa6.ptaa", 0); ptaaWrite(tempname, ptaa6, 0); FREE(tempname); } /* Do quadratic fit vertically on a subset of pixel columns * for the vertical displacement, which identifies the * src pixel(s) for each dest pixel. Sample the displacement * on a regular grid in the vertical direction. */ ptaa7 = ptaaCreate(nx); /* uniformly sampled across full height of image */ for (j = 0; j < nx; j++) { /* for each column */ pta = ptaaGetPta(ptaa6, j, L_CLONE); ptaGetQuadraticLSF(pta, &c2, &c1, &c0, NULL); ptad = ptaCreate(ny); for (i = 0; i < ny; i++) { /* uniformly sampled in y */ y = i * sampling; applyQuadraticFit(c2, c1, c0, y, &val); ptaAddPt(ptad, y, val); } ptaaAddPta(ptaa7, ptad, L_INSERT); ptaDestroy(&pta); } if (debugflag) { tempname = genTempFilename("/tmp", "ptaa7.ptaa", 0); ptaaWrite(tempname, ptaa7, 0); FREE(tempname); } /* Save the result in a fpix at the specified subsampling */ fpix1 = fpixCreate(nx, ny); for (i = 0; i < ny; i++) { for (j = 0; j < nx; j++) { ptaaGetPt(ptaa7, j, i, NULL, &val); fpixSetPixel(fpix1, j, i, val); } } dew->sampvdispar = fpix1; /* Generate a full res fpix for vertical dewarping. We require that * the size of this fpix is at least as big as the input image. */ fpix2 = fpixScaleByInteger(fpix1, sampling); dew->fullvdispar = fpix2; if (debugflag) { pixt1 = fpixRenderContours(fpix2, -2., 2.0, 0.2); pixWriteTempfile("/tmp", "vert-contours.png", pixt1, IFF_PNG, NULL); pixDisplay(pixt1, 1000, 0); pixDestroy(&pixt1); } /* Generate full res and sampled fpix for horizontal dewarping. This * works to the extent that the line curvature is due to bending * out of the plane normal to the camera, and not wide-angle * "fishbowl" distortion. Also generate the sampled horizontal * disparity array. */ if (dew->applyhoriz) { fpix3 = fpixBuildHorizontalDisparity(fpix2, 0, &dew->extraw); dew->fullhdispar = fpix3; dew->samphdispar = fpixSampledDisparity(fpix3, dew->sampling); if (debugflag) { pixt1 = fpixRenderContours(fpix3, -2., 2.0, 0.2); pixWriteTempfile("/tmp", "horiz-contours.png", pixt1, IFF_PNG, NULL); pixDisplay(pixt1, 1000, 0); pixDestroy(&pixt1); } } dew->success = 1; ptaaDestroy(&ptaa1); ptaaDestroy(&ptaa2); ptaaDestroy(&ptaa3); ptaaDestroy(&ptaa4); ptaaDestroy(&ptaa5); ptaaDestroy(&ptaa6); ptaaDestroy(&ptaa7); return 0; }

int main(int argc, char **argv) { l_int32 i, j, w, h, same; l_float32 t, t1, t2; GPLOT *gplot; NUMA *nax, *nay1, *nay2; PIX *pixs, *pixd, *pixt1, *pixt2, *pixt3, *pixt4; PIXA *pixa; static char mainName[] = "rank_reg"; if (argc != 1) return ERROR_INT(" Syntax: rank_reg", mainName, 1); if ((pixs = pixRead("lucasta.150.jpg")) == NULL) return ERROR_INT("pixs not made", mainName, 1); pixGetDimensions(pixs, &w, &h, NULL); startTimer(); pixd = pixRankFilterGray(pixs, 15, 15, 0.4); t = stopTimer(); fprintf(stderr, "Time = %7.3f sec\n", t); fprintf(stderr, "MPix/sec: %7.3f\n", 0.000001 * w * h / t); pixDisplay(pixs, 0, 200); pixDisplay(pixd, 600, 200); pixWrite("/tmp/filter.png", pixd, IFF_PNG); pixDestroy(&pixd); /* Get results for dilation */ startTimer(); pixt1 = pixDilateGray(pixs, 15, 15); t = stopTimer(); fprintf(stderr, "Dilation time = %7.3f sec\n", t); /* Get results for erosion */ pixt2 = pixErodeGray(pixs, 15, 15); /* Get results using the rank filter for rank = 0.0 and 1.0. * Don't use 0.0 or 1.0, because those are dispatched * automatically to erosion and dilation! */ pixt3 = pixRankFilterGray(pixs, 15, 15, 0.0001); pixt4 = pixRankFilterGray(pixs, 15, 15, 0.9999); /* Compare */ pixEqual(pixt1, pixt4, &same); if (same) fprintf(stderr, "Correct: dilation results same as rank 1.0\n"); else fprintf(stderr, "Error: dilation results differ from rank 1.0\n"); pixEqual(pixt2, pixt3, &same); if (same) fprintf(stderr, "Correct: erosion results same as rank 0.0\n"); else fprintf(stderr, "Error: erosion results differ from rank 0.0\n"); pixDestroy(&pixt1); pixDestroy(&pixt2); pixDestroy(&pixt3); pixDestroy(&pixt4); fprintf(stderr, "\n----------------------------------------\n"); fprintf(stderr, "The next part takes about 30 seconds\n"); fprintf(stderr, "----------------------------------------\n\n"); nax = numaMakeSequence(1, 1, SIZE); nay1 = numaCreate(SIZE); nay2 = numaCreate(SIZE); gplot = gplotCreate("/tmp/rankroot", GPLOT_X11, "sec/MPix vs filter size", "size", "time"); for (i = 1; i <= SIZE; i++) { t1 = t2 = 0.0; for (j = 0; j < 5; j++) { startTimer(); pixt1 = pixRankFilterGray(pixs, i, SIZE + 1, 0.5); t1 += stopTimer(); pixDestroy(&pixt1); startTimer(); pixt1 = pixRankFilterGray(pixs, SIZE + 1, i, 0.5); t2 += stopTimer(); if (j == 0) pixDisplayWrite(pixt1, 1); pixDestroy(&pixt1); } numaAddNumber(nay1, 1000000. * t1 / (5. * w * h)); numaAddNumber(nay2, 1000000. * t2 / (5. * w * h)); } gplotAddPlot(gplot, nax, nay1, GPLOT_LINES, "vertical"); gplotAddPlot(gplot, nax, nay2, GPLOT_LINES, "horizontal"); gplotMakeOutput(gplot); gplotDestroy(&gplot); /* Display tiled */ pixa = pixaReadFiles("/tmp/display", "file"); pixd = pixaDisplayTiledAndScaled(pixa, 8, 250, 5, 0, 25, 2); pixWrite("/tmp/tiles.jpg", pixd, IFF_JFIF_JPEG); pixDestroy(&pixd); pixaDestroy(&pixa); pixDestroy(&pixs); pixDisplayWrite(NULL, -1); /* clear out */ pixs = pixRead("test8.jpg"); for (i = 1; i <= 4; i++) { pixt1 = pixScaleGrayRank2(pixs, i); pixDisplay(pixt1, 300 * (i - 1), 100); pixDestroy(&pixt1); } pixDestroy(&pixs); pixs = pixRead("test24.jpg"); pixt1 = pixConvertRGBToLuminance(pixs); pixt2 = pixScale(pixt1, 1.5, 1.5); for (i = 1; i <= 4; i++) { for (j = 1; j <= 4; j++) { pixt3 = pixScaleGrayRankCascade(pixt2, i, j, 0, 0); pixDisplayWrite(pixt3, 1); pixDestroy(&pixt3); } } pixDestroy(&pixt1); pixDestroy(&pixt2); pixDestroy(&pixs); pixDisplayMultiple("/tmp/display/file*"); return 0; }

int main(int argc, char **argv) { l_int32 i; BOXA *boxa; NUMA *nas, *nab; PIX *pixs; PIXA *pixa, *pixas; /* ----------------- Custom with a few large pix -----------------*/ /* Set up pms */ nas = numaCreate(4); /* small */ numaAddNumber(nas, 5); numaAddNumber(nas, 4); numaAddNumber(nas, 3); numaAddNumber(nas, 2); setPixMemoryManager(pmsCustomAlloc, pmsCustomDealloc); pmsCreate(200000, 400000, nas, "/tmp/junk1.log"); /* Make the pix and do successive copies and removals of the copies */ pixas = GenerateSetOfMargePix(); startTimer(); for (i = 0; i < ntimes; i++) CopyStoreClean(pixas, nlevels, ncopies); fprintf(stderr, "Time (big pix; custom) = %7.3f sec\n", stopTimer()); /* Clean up */ numaDestroy(&nas); pixaDestroy(&pixas); pmsDestroy(); /* ----------------- Standard with a few large pix -----------------*/ setPixMemoryManager(malloc, free); /* Make the pix and do successive copies and removals of the copies */ startTimer(); pixas = GenerateSetOfMargePix(); for (i = 0; i < ntimes; i++) CopyStoreClean(pixas, nlevels, ncopies); fprintf(stderr, "Time (big pix; standard) = %7.3f sec\n", stopTimer()); pixaDestroy(&pixas); /* ----------------- Custom with many small pix -----------------*/ /* Set up pms */ nab = numaCreate(10); numaAddNumber(nab, 2000); numaAddNumber(nab, 2000); numaAddNumber(nab, 2000); numaAddNumber(nab, 500); numaAddNumber(nab, 100); numaAddNumber(nab, 100); numaAddNumber(nab, 100); setPixMemoryManager(pmsCustomAlloc, pmsCustomDealloc); if (logging) /* use logging == 0 for speed comparison */ pmsCreate(20, 40, nab, "/tmp/junk2.log"); else pmsCreate(20, 40, nab, NULL); pixs = pixRead("feyn.tif"); startTimer(); for (i = 0; i < 5; i++) { boxa = pixConnComp(pixs, &pixa, 8); boxaDestroy(&boxa); pixaDestroy(&pixa); } numaDestroy(&nab); pixDestroy(&pixs); pmsDestroy(); fprintf(stderr, "Time (custom) = %7.3f sec\n", stopTimer()); /* ----------------- Standard with many small pix -----------------*/ setPixMemoryManager(malloc, free); pixs = pixRead("feyn.tif"); startTimer(); for (i = 0; i < 5; i++) { boxa = pixConnComp(pixs, &pixa, 8); boxaDestroy(&boxa); pixaDestroy(&pixa); } pixDestroy(&pixs); fprintf(stderr, "Time (standard) = %7.3f sec\n", stopTimer()); return 0; }

int main(int argc, char **argv) { l_int32 size, i, n, n0; BOXA *boxa; GPLOT *gplot; NUMA *nax, *nay1, *nay2; PIX *pixs, *pixd; static char mainName[] = "pixa1_reg"; if (argc != 1) return ERROR_INT(" Syntax: pixa1_reg", mainName, 1); if ((pixs = pixRead("feyn.tif")) == NULL) return ERROR_INT("pixs not made", mainName, 1); /* ---------------- Remove small components --------------- */ boxa = pixConnComp(pixs, NULL, 8); n0 = boxaGetCount(boxa); nax = numaMakeSequence(0, 2, 51); nay1 = numaCreate(51); nay2 = numaCreate(51); boxaDestroy(&boxa); fprintf(stderr, "\n Select Large if Both\n"); fprintf(stderr, "Iter 0: n = %d\n", n0); numaAddNumber(nay1, n0); for (i = 1; i <= 50; i++) { size = 2 * i; pixd = pixSelectBySize(pixs, size, size, CONNECTIVITY, L_SELECT_IF_BOTH, L_SELECT_IF_GTE, NULL); boxa = pixConnComp(pixd, NULL, 8); n = boxaGetCount(boxa); numaAddNumber(nay1, n); fprintf(stderr, "Iter %d: n = %d\n", i, n); boxaDestroy(&boxa); pixDestroy(&pixd); } fprintf(stderr, "\n Select Large if Either\n"); fprintf(stderr, "Iter 0: n = %d\n", n0); numaAddNumber(nay2, n0); for (i = 1; i <= 50; i++) { size = 2 * i; pixd = pixSelectBySize(pixs, size, size, CONNECTIVITY, L_SELECT_IF_EITHER, L_SELECT_IF_GTE, NULL); boxa = pixConnComp(pixd, NULL, 8); n = boxaGetCount(boxa); numaAddNumber(nay2, n); fprintf(stderr, "Iter %d: n = %d\n", i, n); boxaDestroy(&boxa); pixDestroy(&pixd); } gplot = gplotCreate("/tmp/junkroot1", GPLOT_X11, "Select large: number of cc vs size removed", "min size", "number of c.c."); gplotAddPlot(gplot, nax, nay1, GPLOT_LINES, "select if both"); gplotAddPlot(gplot, nax, nay2, GPLOT_LINES, "select if either"); gplotMakeOutput(gplot); gplotDestroy(&gplot); /* ---------------- Remove large components --------------- */ numaEmpty(nay1); numaEmpty(nay2); fprintf(stderr, "\n Select Small if Both\n"); fprintf(stderr, "Iter 0: n = %d\n", 0); numaAddNumber(nay1, 0); for (i = 1; i <= 50; i++) { size = 2 * i; pixd = pixSelectBySize(pixs, size, size, CONNECTIVITY, L_SELECT_IF_BOTH, L_SELECT_IF_LTE, NULL); boxa = pixConnComp(pixd, NULL, 8); n = boxaGetCount(boxa); numaAddNumber(nay1, n); fprintf(stderr, "Iter %d: n = %d\n", i, n); boxaDestroy(&boxa); pixDestroy(&pixd); } fprintf(stderr, "\n Select Small if Either\n"); fprintf(stderr, "Iter 0: n = %d\n", 0); numaAddNumber(nay2, 0); for (i = 1; i <= 50; i++) { size = 2 * i; pixd = pixSelectBySize(pixs, size, size, CONNECTIVITY, L_SELECT_IF_EITHER, L_SELECT_IF_LTE, NULL); boxa = pixConnComp(pixd, NULL, 8); n = boxaGetCount(boxa); numaAddNumber(nay2, n); fprintf(stderr, "Iter %d: n = %d\n", i, n); boxaDestroy(&boxa); pixDestroy(&pixd); } gplot = gplotCreate("/tmp/junkroot2", GPLOT_X11, "Remove large: number of cc vs size removed", "min size", "number of c.c."); gplotAddPlot(gplot, nax, nay1, GPLOT_LINES, "select if both"); gplotAddPlot(gplot, nax, nay2, GPLOT_LINES, "select if either"); gplotMakeOutput(gplot); gplotDestroy(&gplot); numaDestroy(&nax); numaDestroy(&nay1); numaDestroy(&nay2); pixDestroy(&pixs); return 0; }

main(int argc, char **argv) { l_int32 i, n, w, h, success, display; FILE *fp; BOXA *boxa; NUMA *naindex, *naw, *nah, *naw_med, *nah_med; PIX *pixs, *pixt, *pixd; if (regTestSetup(argc, argv, &fp, &display, &success, NULL)) return 1; /* Generate arrays of word widths and heights */ pixs = pixRead("feyn.tif"); pixGetWordBoxesInTextlines(pixs, 1, 6, 6, 500, 50, &boxa, &naindex); n = boxaGetCount(boxa); naw = numaCreate(0); nah = numaCreate(0); for (i = 0; i < n; i++) { boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h); numaAddNumber(naw, w); numaAddNumber(nah, h); } boxaDestroy(&boxa); numaDestroy(&naindex); /* Make the rank bin arrays of median values, with 10 bins */ numaGetRankBinValues(naw, 10, NULL, &naw_med); numaGetRankBinValues(nah, 10, NULL, &nah_med); gplotSimple1(naw_med, GPLOT_PNG, "/tmp/w_10bin", "width vs rank bins (10)"); gplotSimple1(nah_med, GPLOT_PNG, "/tmp/h_10bin", "height vs rank bins (10)"); numaDestroy(&naw_med); numaDestroy(&nah_med); /* Make the rank bin arrays of median values, with 30 bins */ numaGetRankBinValues(naw, 30, NULL, &naw_med); numaGetRankBinValues(nah, 30, NULL, &nah_med); gplotSimple1(naw_med, GPLOT_PNG, "/tmp/w_30bin", "width vs rank bins (30)"); gplotSimple1(nah_med, GPLOT_PNG, "/tmp/h_30bin", "height vs rank bins (30)"); numaDestroy(&naw_med); numaDestroy(&nah_med); /* Give gnuplot time to write out the files */ #ifndef _WIN32 sleep(2); #else Sleep(2000); #endif /* _WIN32 */ /* Save as golden files, or check against them */ regTestCheckFile(fp, argv, "/tmp/w_10bin.png", 0, &success); regTestCheckFile(fp, argv, "/tmp/h_10bin.png", 1, &success); regTestCheckFile(fp, argv, "/tmp/w_30bin.png", 2, &success); regTestCheckFile(fp, argv, "/tmp/h_30bin.png", 3, &success); /* Display results for debugging */ pixt = pixRead("/tmp/w_10bin.png"); pixDisplayWithTitle(pixt, 0, 0, NULL, display); pixDestroy(&pixt); pixt = pixRead("/tmp/h_10bin.png"); pixDisplayWithTitle(pixt, 650, 0, NULL, display); pixDestroy(&pixt); pixt = pixRead("/tmp/w_30bin.png"); pixDisplayWithTitle(pixt, 0, 550, NULL, display); pixDestroy(&pixt); pixt = pixRead("/tmp/h_30bin.png"); pixDisplayWithTitle(pixt, 650, 550, NULL, display); pixDestroy(&pixt); pixDestroy(&pixs); numaDestroy(&naw); numaDestroy(&nah); regTestCleanup(argc, argv, fp, success, NULL); return 0; }

/*! * \brief pixGetLocalSkewAngles() * * \param[in] pixs 1 bpp * \param[in] nslices the number of horizontal overlapping slices; must * be larger than 1 and not exceed 20; 0 for default * \param[in] redsweep sweep reduction factor: 1, 2, 4 or 8; * use 0 for default value * \param[in] redsearch search reduction factor: 1, 2, 4 or 8, and not * larger than redsweep; use 0 for default value * \param[in] sweeprange half the full range, assumed about 0; in degrees; * use 0.0 for default value * \param[in] sweepdelta angle increment of sweep; in degrees; * use 0.0 for default value * \param[in] minbsdelta min binary search increment angle; in degrees; * use 0.0 for default value * \param[out] pa [optional] slope of skew as fctn of y * \param[out] pb [optional] intercept at y=0 of skew as fctn of y * \param[in] debug 1 for generating plot of skew angle vs. y; 0 otherwise * \return naskew, or NULL on error * * <pre> * Notes: * (1) The local skew is measured in a set of overlapping strips. * We then do a least square linear fit parameters to get * the slope and intercept parameters a and b in * skew-angle = a * y + b (degrees) * for the local skew as a function of raster line y. * This is then used to make naskew, which can be interpreted * as the computed skew angle (in degrees) at the left edge * of each raster line. * (2) naskew can then be used to find the baselines of text, because * each text line has a baseline that should intersect * the left edge of the image with the angle given by this * array, evaluated at the raster line of intersection. * </pre> */ NUMA * pixGetLocalSkewAngles(PIX *pixs, l_int32 nslices, l_int32 redsweep, l_int32 redsearch, l_float32 sweeprange, l_float32 sweepdelta, l_float32 minbsdelta, l_float32 *pa, l_float32 *pb, l_int32 debug) { l_int32 w, h, hs, i, ystart, yend, ovlap, npts; l_float32 angle, conf, ycenter, a, b; BOX *box; GPLOT *gplot; NUMA *naskew, *nax, *nay; PIX *pix; PTA *pta; PROCNAME("pixGetLocalSkewAngles"); if (!pixs || pixGetDepth(pixs) != 1) return (NUMA *)ERROR_PTR("pixs undefined or not 1 bpp", procName, NULL); if (nslices < 2 || nslices > 20) nslices = DEFAULT_SLICES; if (redsweep < 1 || redsweep > 8) redsweep = DEFAULT_SWEEP_REDUCTION; if (redsearch < 1 || redsearch > redsweep) redsearch = DEFAULT_BS_REDUCTION; if (sweeprange == 0.0) sweeprange = DEFAULT_SWEEP_RANGE; if (sweepdelta == 0.0) sweepdelta = DEFAULT_SWEEP_DELTA; if (minbsdelta == 0.0) minbsdelta = DEFAULT_MINBS_DELTA; pixGetDimensions(pixs, &w, &h, NULL); hs = h / nslices; ovlap = (l_int32)(OVERLAP_FRACTION * hs); pta = ptaCreate(nslices); for (i = 0; i < nslices; i++) { ystart = L_MAX(0, hs * i - ovlap); yend = L_MIN(h - 1, hs * (i + 1) + ovlap); ycenter = (ystart + yend) / 2; box = boxCreate(0, ystart, w, yend - ystart + 1); pix = pixClipRectangle(pixs, box, NULL); pixFindSkewSweepAndSearch(pix, &angle, &conf, redsweep, redsearch, sweeprange, sweepdelta, minbsdelta); if (conf > MIN_ALLOWED_CONFIDENCE) ptaAddPt(pta, ycenter, angle); pixDestroy(&pix); boxDestroy(&box); } /* Do linear least squares fit */ if ((npts = ptaGetCount(pta)) < 2) { ptaDestroy(&pta); return (NUMA *)ERROR_PTR("can't fit skew", procName, NULL); } ptaGetLinearLSF(pta, &a, &b, NULL); if (pa) *pa = a; if (pb) *pb = b; /* Make skew angle array as function of raster line */ naskew = numaCreate(h); for (i = 0; i < h; i++) { angle = a * i + b; numaAddNumber(naskew, angle); } if (debug) { lept_mkdir("lept/baseline"); ptaGetArrays(pta, &nax, &nay); gplot = gplotCreate("/tmp/lept/baseline/skew", GPLOT_PNG, "skew as fctn of y", "y (in raster lines from top)", "angle (in degrees)"); gplotAddPlot(gplot, NULL, naskew, GPLOT_POINTS, "linear lsf"); gplotAddPlot(gplot, nax, nay, GPLOT_POINTS, "actual data pts"); gplotMakeOutput(gplot); gplotDestroy(&gplot); numaDestroy(&nax); numaDestroy(&nay); } ptaDestroy(&pta); return naskew; }

/*! * pixaDisplayTiledInRows() * * Input: pixa * outdepth (output depth: 1, 8 or 32 bpp) * maxwidth (of output image) * scalefactor (applied to every pix; use 1.0 for no scaling) * background (0 for white, 1 for black; this is the color * of the spacing between the images) * spacing (between images, and on outside) * border (width of black border added to each image; * use 0 for no border) * Return: pixd (of tiled images), or null on error * * Notes: * (1) This saves a pixa to a single image file of width not to * exceed maxwidth, with background color either white or black, * and with each row tiled such that the top of each pix is * aligned and separated by 'spacing' from the next one. * A black border can be added to each pix. * (2) All pix are converted to outdepth; existing colormaps are removed. * (3) This does a reasonably spacewise-efficient job of laying * out the individual pix images into a tiled composite. */ PIX * pixaDisplayTiledInRows(PIXA *pixa, l_int32 outdepth, l_int32 maxwidth, l_float32 scalefactor, l_int32 background, l_int32 spacing, l_int32 border) { l_int32 h; /* cumulative height over all the rows */ l_int32 w; /* cumulative height in the current row */ l_int32 bordval, wtry, wt, ht; l_int32 irow; /* index of current pix in current row */ l_int32 wmaxrow; /* width of the largest row */ l_int32 maxh; /* max height in row */ l_int32 i, j, index, n, x, y, nrows, ninrow; NUMA *nainrow; /* number of pix in the row */ NUMA *namaxh; /* height of max pix in the row */ PIX *pix, *pixn, *pixt, *pixd; PIXA *pixan; PROCNAME("pixaDisplayTiledInRows"); if (!pixa) return (PIX *)ERROR_PTR("pixa not defined", procName, NULL); if (outdepth != 1 && outdepth != 8 && outdepth != 32) return (PIX *)ERROR_PTR("outdepth not in {1, 8, 32}", procName, NULL); if (border < 0) border = 0; if (scalefactor <= 0.0) scalefactor = 1.0; if ((n = pixaGetCount(pixa)) == 0) return (PIX *)ERROR_PTR("no components", procName, NULL); /* Normalize depths, scale, remove colormaps; optionally add border */ pixan = pixaCreate(n); bordval = (outdepth == 1) ? 1 : 0; for (i = 0; i < n; i++) { if ((pix = pixaGetPix(pixa, i, L_CLONE)) == NULL) continue; if (outdepth == 1) pixn = pixConvertTo1(pix, 128); else if (outdepth == 8) pixn = pixConvertTo8(pix, FALSE); else /* outdepth == 32 */ pixn = pixConvertTo32(pix); pixDestroy(&pix); if (scalefactor != 1.0) pixt = pixScale(pixn, scalefactor, scalefactor); else pixt = pixClone(pixn); if (border) pixd = pixAddBorder(pixt, border, bordval); else pixd = pixClone(pixt); pixDestroy(&pixn); pixDestroy(&pixt); pixaAddPix(pixan, pixd, L_INSERT); } if (pixaGetCount(pixan) != n) { n = pixaGetCount(pixan); L_WARNING_INT("only got %d components", procName, n); if (n == 0) { pixaDestroy(&pixan); return (PIX *)ERROR_PTR("no components", procName, NULL); } } /* Compute parameters for layout */ nainrow = numaCreate(0); namaxh = numaCreate(0); wmaxrow = 0; w = h = spacing; maxh = 0; /* max height in row */ for (i = 0, irow = 0; i < n; i++, irow++) { pixaGetPixDimensions(pixan, i, &wt, &ht, NULL); wtry = w + wt + spacing; if (wtry > maxwidth) { /* end the current row and start next one */ numaAddNumber(nainrow, irow); numaAddNumber(namaxh, maxh); wmaxrow = L_MAX(wmaxrow, w); h += maxh + spacing; irow = 0; w = wt + 2 * spacing; maxh = ht; } else { w = wtry; maxh = L_MAX(maxh, ht); } } /* Enter the parameters for the last row */ numaAddNumber(nainrow, irow); numaAddNumber(namaxh, maxh); wmaxrow = L_MAX(wmaxrow, w); h += maxh + spacing; if ((pixd = pixCreate(wmaxrow, h, outdepth)) == NULL) { numaDestroy(&nainrow); numaDestroy(&namaxh); pixaDestroy(&pixan); return (PIX *)ERROR_PTR("pixd not made", procName, NULL); } /* Reset the background color if necessary */ if ((background == 1 && outdepth == 1) || (background == 0 && outdepth != 1)) pixSetAll(pixd); /* Blit the images to the dest */ nrows = numaGetCount(nainrow); y = spacing; for (i = 0, index = 0; i < nrows; i++) { /* over rows */ numaGetIValue(nainrow, i, &ninrow); numaGetIValue(namaxh, i, &maxh); x = spacing; for (j = 0; j < ninrow; j++, index++) { /* over pix in row */ pix = pixaGetPix(pixan, index, L_CLONE); pixGetDimensions(pix, &wt, &ht, NULL); pixRasterop(pixd, x, y, wt, ht, PIX_SRC, pix, 0, 0); pixDestroy(&pix); x += wt + spacing; } y += maxh + spacing; } numaDestroy(&nainrow); numaDestroy(&namaxh); pixaDestroy(&pixan); return pixd; }

/*! * kernelCreateFromFile() * * Input: filename * Return: kernel, or null on error * * Notes: * (1) The file contains, in the following order: * - Any number of comment lines starting with '#' are ignored * - The height and width of the kernel * - The y and x values of the kernel origin * - The kernel data, formatted as lines of numbers (integers * or floats) for the kernel values in row-major order, * and with no other punctuation. * (Note: this differs from kernelCreateFromString(), * where each line must begin and end with a double-quote * to tell the compiler it's part of a string.) * - The kernel specification ends when a blank line, * a comment line, or the end of file is reached. * (2) All lines must be left-justified. * (3) See kernelCreateFromString() for a description of the string * format for the kernel data. As an example, here are the lines * of a valid kernel description file In the file, all lines * are left-justified: * # small 3x3 kernel * 3 3 * 1 1 * 25.5 51 24.3 * 70.2 146.3 73.4 * 20 50.9 18.4 */ L_KERNEL * kernelCreateFromFile(const char *filename) { char *filestr, *line; l_int32 nlines, i, j, first, index, w, h, cx, cy, n; l_float32 val; size_t size; NUMA *na, *nat; SARRAY *sa; L_KERNEL *kel; PROCNAME("kernelCreateFromFile"); if (!filename) return (L_KERNEL *)ERROR_PTR("filename not defined", procName, NULL); filestr = (char *)l_binaryRead(filename, &size); sa = sarrayCreateLinesFromString(filestr, 1); FREE(filestr); nlines = sarrayGetCount(sa); /* Find the first data line. */ for (i = 0; i < nlines; i++) { line = sarrayGetString(sa, i, L_NOCOPY); if (line[0] != '#') { first = i; break; } } /* Find the kernel dimensions and origin location. */ line = sarrayGetString(sa, first, L_NOCOPY); if (sscanf(line, "%d %d", &h, &w) != 2) return (L_KERNEL *)ERROR_PTR("error reading h,w", procName, NULL); line = sarrayGetString(sa, first + 1, L_NOCOPY); if (sscanf(line, "%d %d", &cy, &cx) != 2) return (L_KERNEL *)ERROR_PTR("error reading cy,cx", procName, NULL); /* Extract the data. This ends when we reach eof, or when we * encounter a line of data that is either a null string or * contains just a newline. */ na = numaCreate(0); for (i = first + 2; i < nlines; i++) { line = sarrayGetString(sa, i, L_NOCOPY); if (line[0] == '\0' || line[0] == '\n' || line[0] == '#') break; nat = parseStringForNumbers(line, " \t\n"); numaJoin(na, nat, 0, -1); numaDestroy(&nat); } sarrayDestroy(&sa); n = numaGetCount(na); if (n != w * h) { numaDestroy(&na); fprintf(stderr, "w = %d, h = %d, num ints = %d\n", w, h, n); return (L_KERNEL *)ERROR_PTR("invalid integer data", procName, NULL); } kel = kernelCreate(h, w); kernelSetOrigin(kel, cy, cx); index = 0; for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { numaGetFValue(na, index, &val); kernelSetElement(kel, i, j, val); index++; } } numaDestroy(&na); return kel; }

/*! * pixaSort() * * Input: pixas * sorttype (L_SORT_BY_X, L_SORT_BY_Y, L_SORT_BY_WIDTH, * L_SORT_BY_HEIGHT, L_SORT_BY_MIN_DIMENSION, * L_SORT_BY_MAX_DIMENSION, L_SORT_BY_PERIMETER, * L_SORT_BY_AREA, L_SORT_BY_ASPECT_RATIO) * sortorder (L_SORT_INCREASING, L_SORT_DECREASING) * &naindex (<optional return> index of sorted order into * original array) * copyflag (L_COPY, L_CLONE) * Return: pixad (sorted version of pixas), or null on error * * Notes: * (1) This sorts based on the data in the boxa. If the boxa * count is not the same as the pixa count, this returns an error. * (2) The copyflag refers to the pix and box copies that are * inserted into the sorted pixa. These are either L_COPY * or L_CLONE. */ PIXA * pixaSort(PIXA *pixas, l_int32 sorttype, l_int32 sortorder, NUMA **pnaindex, l_int32 copyflag) { l_int32 i, n, x, y, w, h; BOXA *boxa; NUMA *na, *naindex; PIXA *pixad; PROCNAME("pixaSort"); if (pnaindex) *pnaindex = NULL; if (!pixas) return (PIXA *)ERROR_PTR("pixas not defined", procName, NULL); if (sorttype != L_SORT_BY_X && sorttype != L_SORT_BY_Y && sorttype != L_SORT_BY_WIDTH && sorttype != L_SORT_BY_HEIGHT && sorttype != L_SORT_BY_MIN_DIMENSION && sorttype != L_SORT_BY_MAX_DIMENSION && sorttype != L_SORT_BY_PERIMETER && sorttype != L_SORT_BY_AREA && sorttype != L_SORT_BY_ASPECT_RATIO) return (PIXA *)ERROR_PTR("invalid sort type", procName, NULL); if (sortorder != L_SORT_INCREASING && sortorder != L_SORT_DECREASING) return (PIXA *)ERROR_PTR("invalid sort order", procName, NULL); if (copyflag != L_COPY && copyflag != L_CLONE) return (PIXA *)ERROR_PTR("invalid copy flag", procName, NULL); if ((boxa = pixas->boxa) == NULL) /* not owned; do not destroy */ return (PIXA *)ERROR_PTR("boxa not found", procName, NULL); n = pixaGetCount(pixas); if (boxaGetCount(boxa) != n) return (PIXA *)ERROR_PTR("boxa and pixa counts differ", procName, NULL); /* Use O(n) binsort if possible */ if (n > MIN_COMPS_FOR_BIN_SORT && ((sorttype == L_SORT_BY_X) || (sorttype == L_SORT_BY_Y) || (sorttype == L_SORT_BY_WIDTH) || (sorttype == L_SORT_BY_HEIGHT) || (sorttype == L_SORT_BY_PERIMETER))) return pixaBinSort(pixas, sorttype, sortorder, pnaindex, copyflag); /* Build up numa of specific data */ if ((na = numaCreate(n)) == NULL) return (PIXA *)ERROR_PTR("na not made", procName, NULL); for (i = 0; i < n; i++) { boxaGetBoxGeometry(boxa, i, &x, &y, &w, &h); switch (sorttype) { case L_SORT_BY_X: numaAddNumber(na, x); break; case L_SORT_BY_Y: numaAddNumber(na, y); break; case L_SORT_BY_WIDTH: numaAddNumber(na, w); break; case L_SORT_BY_HEIGHT: numaAddNumber(na, h); break; case L_SORT_BY_MIN_DIMENSION: numaAddNumber(na, L_MIN(w, h)); break; case L_SORT_BY_MAX_DIMENSION: numaAddNumber(na, L_MAX(w, h)); break; case L_SORT_BY_PERIMETER: numaAddNumber(na, w + h); break; case L_SORT_BY_AREA: numaAddNumber(na, w * h); break; case L_SORT_BY_ASPECT_RATIO: numaAddNumber(na, (l_float32)w / (l_float32)h); break; default: L_WARNING("invalid sort type", procName); } } /* Get the sort index for data array */ if ((naindex = numaGetSortIndex(na, sortorder)) == NULL) return (PIXA *)ERROR_PTR("naindex not made", procName, NULL); /* Build up sorted pixa using sort index */ if ((pixad = pixaSortByIndex(pixas, naindex, copyflag)) == NULL) return (PIXA *)ERROR_PTR("pixad not made", procName, NULL); if (pnaindex) *pnaindex = naindex; else numaDestroy(&naindex); numaDestroy(&na); return pixad; }