/*! * pixaRemovePix() * * Input: pixa * index (of pix to be removed) * Return: 0 if OK, 1 on error * * Notes: * (1) This shifts pixa[i] --> pixa[i - 1] for all i > index. * (2) It should not be used repeatedly on large arrays, * because the function is O(n). * (3) The corresponding box is removed as well, if it exists. */ l_int32 pixaRemovePix(PIXA *pixa, l_int32 index) { l_int32 i, n, nbox; BOXA *boxa; PIX **array; PROCNAME("pixaRemovePix"); if (!pixa) return ERROR_INT("pixa not defined", procName, 1); n = pixaGetCount(pixa); if (index < 0 || index >= n) return ERROR_INT("index not in {0...n - 1}", procName, 1); /* Remove the pix */ array = pixa->pix; pixDestroy(&array[index]); for (i = index + 1; i < n; i++) array[i - 1] = array[i]; array[n - 1] = NULL; pixa->n--; /* Remove the box if it exists */ boxa = pixa->boxa; nbox = boxaGetCount(boxa); if (index < nbox) boxaRemoveBox(boxa, index); return 0; }
/*! * pixaGenerateFont() * * Input: pix (of 95 characters in 3 rows) * fontsize (4, 6, 8, ... , 20, in pts at 300 ppi) * &bl1 (<return> baseline of row 1) * &bl2 (<return> baseline of row 2) * &bl3 (<return> baseline of row 3) * Return: pixa of font bitmaps for 95 characters, or null on error * * Notes: * (1) This does all the work. See pixaGenerateFontFromFile() * for an overview. * (2) The pix is for one of the 9 fonts. @fontsize is only * used here for debugging. */ PIXA * pixaGenerateFont(PIX *pixs, l_int32 fontsize, l_int32 *pbl0, l_int32 *pbl1, l_int32 *pbl2) { l_int32 i, j, nrows, nrowchars, nchars, h, yval; l_int32 width, height; l_int32 baseline[3]; l_int32 *tab = NULL; BOX *box, *box1, *box2; BOXA *boxar, *boxac, *boxacs; PIX *pix1, *pix2, *pixr, *pixrc, *pixc; PIXA *pixa; l_int32 n, w, inrow, top; l_int32 *ia; NUMA *na; PROCNAME("pixaGenerateFont"); if (!pbl0 || !pbl1 || !pbl2) return (PIXA *)ERROR_PTR("&bl not all defined", procName, NULL); *pbl0 = *pbl1 = *pbl2 = 0; if (!pixs) return (PIXA *)ERROR_PTR("pixs not defined", procName, NULL); /* Locate the 3 rows of characters */ w = pixGetWidth(pixs); na = pixCountPixelsByRow(pixs, NULL); boxar = boxaCreate(0); n = numaGetCount(na); ia = numaGetIArray(na); inrow = 0; for (i = 0; i < n; i++) { if (!inrow && ia[i] > 0) { inrow = 1; top = i; } else if (inrow && ia[i] == 0) { inrow = 0; box = boxCreate(0, top, w, i - top); boxaAddBox(boxar, box, L_INSERT); } } FREE(ia); numaDestroy(&na); nrows = boxaGetCount(boxar); #if DEBUG_FONT_GEN L_INFO("For fontsize %s, have %d rows\n", procName, fontsize, nrows); #endif /* DEBUG_FONT_GEN */ if (nrows != 3) { L_INFO("nrows = %d; skipping fontsize %d\n", procName, nrows, fontsize); return (PIXA *)ERROR_PTR("3 rows not generated", procName, NULL); } /* Grab the character images and baseline data */ #if DEBUG_BASELINE lept_rmdir("baseline"); lept_mkdir("baseline"); #endif /* DEBUG_BASELINE */ tab = makePixelSumTab8(); pixa = pixaCreate(95); for (i = 0; i < nrows; i++) { box = boxaGetBox(boxar, i, L_CLONE); pixr = pixClipRectangle(pixs, box, NULL); /* row of chars */ pixGetTextBaseline(pixr, tab, &yval); baseline[i] = yval; #if DEBUG_BASELINE L_INFO("Baseline info: row %d, yval = %d, h = %d\n", procName, i, yval, pixGetHeight(pixr)); pix1 = pixCopy(NULL, pixr); pixRenderLine(pix1, 0, yval, pixGetWidth(pix1), yval, 1, L_FLIP_PIXELS); if (i == 0 ) pixWrite("/tmp/baseline/row0.png", pix1, IFF_PNG); else if (i == 1) pixWrite("/tmp/baseline/row1.png", pix1, IFF_PNG); else pixWrite("/tmp/baseline/row2.png", pix1, IFF_PNG); pixDestroy(&pix1); #endif /* DEBUG_BASELINE */ boxDestroy(&box); pixrc = pixCloseSafeBrick(NULL, pixr, 1, 35); boxac = pixConnComp(pixrc, NULL, 8); boxacs = boxaSort(boxac, L_SORT_BY_X, L_SORT_INCREASING, NULL); if (i == 0) { /* consolidate the two components of '"' */ box1 = boxaGetBox(boxacs, 1, L_CLONE); box2 = boxaGetBox(boxacs, 2, L_CLONE); box1->w = box2->x + box2->w - box1->x; /* increase width */ boxDestroy(&box1); boxDestroy(&box2); boxaRemoveBox(boxacs, 2); } h = pixGetHeight(pixr); nrowchars = boxaGetCount(boxacs); for (j = 0; j < nrowchars; j++) { box = boxaGetBox(boxacs, j, L_COPY); if (box->w <= 2 && box->h == 1) { /* skip 1x1, 2x1 components */ boxDestroy(&box); continue; } box->y = 0; box->h = h - 1; pixc = pixClipRectangle(pixr, box, NULL); boxDestroy(&box); if (i == 0 && j == 0) /* add a pix for the space; change later */ pixaAddPix(pixa, pixc, L_COPY); if (i == 2 && j == 0) /* add a pix for the '\'; change later */ pixaAddPix(pixa, pixc, L_COPY); pixaAddPix(pixa, pixc, L_INSERT); } pixDestroy(&pixr); pixDestroy(&pixrc); boxaDestroy(&boxac); boxaDestroy(&boxacs); } FREE(tab); nchars = pixaGetCount(pixa); if (nchars != 95) return (PIXA *)ERROR_PTR("95 chars not generated", procName, NULL); *pbl0 = baseline[0]; *pbl1 = baseline[1]; *pbl2 = baseline[2]; /* Fix the space character up; it should have no ON pixels, * and be about twice as wide as the '!' character. */ pix1 = pixaGetPix(pixa, 0, L_CLONE); width = 2 * pixGetWidth(pix1); height = pixGetHeight(pix1); pixDestroy(&pix1); pix1 = pixCreate(width, height, 1); pixaReplacePix(pixa, 0, pix1, NULL); /* Fix up the '\' character; use a LR flip of the '/' char */ pix1 = pixaGetPix(pixa, 15, L_CLONE); pix2 = pixFlipLR(NULL, pix1); pixDestroy(&pix1); pixaReplacePix(pixa, 60, pix2, NULL); #if DEBUG_CHARS pix1 = pixaDisplayTiled(pixa, 1500, 0, 10); pixDisplay(pix1, 100 * i, 200); pixDestroy(&pix1); #endif /* DEBUG_CHARS */ boxaDestroy(&boxar); return pixa; }
/*! * pixaGenerateFont() * * Input: dir (directory holding image of character set) * size (4, 6, 8, ... , 20, in pts at 300 ppi) * &bl1 (<return> baseline of row 1) * &bl2 (<return> baseline of row 2) * &bl3 (<return> baseline of row 3) * Return: pixa of font bitmaps for 95 characters, or null on error * * These font generation functions use 9 sets, each with bitmaps * of 94 ascii characters, all in Palatino-Roman font. * Each input bitmap has 3 rows of characters. The range of * ascii values in each row is as follows: * row 0: 32-57 (32 is a space) * row 1: 58-91 (92, '\', is not represented in this font) * row 2: 93-126 * We LR flip the '/' char to generate a bitmap for the missing * '\' character, so that we have representations of all 95 * printable chars. * * Computation of the bitmaps and baselines for a single * font takes from 40 to 200 msec on a 2 GHz processor, * depending on the size. Use pixaGetFont() to read the * generated character set directly from files that were * produced in prog/genfonts.c using this function. */ PIXA * pixaGenerateFont(const char *dir, l_int32 size, l_int32 *pbl0, l_int32 *pbl1, l_int32 *pbl2) { char *pathname; l_int32 fileno; l_int32 i, j, nrows, nrowchars, nchars, h, yval; l_int32 width, height; l_int32 baseline[3]; l_int32 *tab; BOX *box, *box1, *box2; BOXA *boxar, *boxac, *boxacs; PIX *pixs, *pixt1, *pixt2, *pixt3; PIX *pixr, *pixrc, *pixc; PIXA *pixa; PROCNAME("pixaGenerateFont"); if (!pbl0 || !pbl1 || !pbl2) return (PIXA *)ERROR_PTR("&bl not all defined", procName, NULL); *pbl0 = *pbl1 = *pbl2 = 0; fileno = (size / 2) - 2; if (fileno < 0 || fileno > NFONTS) return (PIXA *)ERROR_PTR("font size invalid", procName, NULL); tab = makePixelSumTab8(); pathname = genPathname(dir, inputfonts[fileno]); if ((pixs = pixRead(pathname)) == NULL) return (PIXA *)ERROR_PTR("pixs not all defined", procName, NULL); FREE(pathname); pixa = pixaCreate(95); pixt1 = pixMorphSequence(pixs, "c1.35 + c101.1", 0); boxar = pixConnComp(pixt1, NULL, 8); /* one box for each row */ pixDestroy(&pixt1); nrows = boxaGetCount(boxar); #if DEBUG_FONT_GEN fprintf(stderr, "For font %s, number of rows is %d\n", inputfonts[fileno], nrows); #endif /* DEBUG_FONT_GEN */ if (nrows != 3) { L_INFO_INT2("nrows = %d; skipping font %d", procName, nrows, fileno); return (PIXA *)ERROR_PTR("3 rows not generated", procName, NULL); } for (i = 0; i < nrows; i++) { box = boxaGetBox(boxar, i, L_CLONE); pixr = pixClipRectangle(pixs, box, NULL); /* row of chars */ pixGetTextBaseline(pixr, tab, &yval); baseline[i] = yval; #if DEBUG_BASELINE { PIX *pixbl; fprintf(stderr, "row %d, yval = %d, h = %d\n", i, yval, pixGetHeight(pixr)); pixbl = pixCopy(NULL, pixr); pixRenderLine(pixbl, 0, yval, pixGetWidth(pixbl), yval, 1, L_FLIP_PIXELS); if (i == 0 ) pixWrite("junktl0", pixbl, IFF_PNG); else if (i == 1) pixWrite("junktl1", pixbl, IFF_PNG); else pixWrite("junktl2", pixbl, IFF_PNG); pixDestroy(&pixbl); } #endif /* DEBUG_BASELINE */ boxDestroy(&box); pixrc = pixCloseSafeBrick(NULL, pixr, 1, 35); boxac = pixConnComp(pixrc, NULL, 8); boxacs = boxaSort(boxac, L_SORT_BY_X, L_SORT_INCREASING, NULL); if (i == 0) { /* consolidate the two components of '"' */ box1 = boxaGetBox(boxacs, 1, L_CLONE); box2 = boxaGetBox(boxacs, 2, L_CLONE); box1->w = box2->x + box2->w - box1->x; /* increase width */ boxDestroy(&box1); boxDestroy(&box2); boxaRemoveBox(boxacs, 2); } h = pixGetHeight(pixr); nrowchars = boxaGetCount(boxacs); for (j = 0; j < nrowchars; j++) { box = boxaGetBox(boxacs, j, L_COPY); if (box->w <= 2 && box->h == 1) { /* skip 1x1, 2x1 components */ boxDestroy(&box); continue; } box->y = 0; box->h = h - 1; pixc = pixClipRectangle(pixr, box, NULL); boxDestroy(&box); if (i == 0 && j == 0) /* add a pix for the space; change later */ pixaAddPix(pixa, pixc, L_COPY); if (i == 2 && j == 0) /* add a pix for the '\'; change later */ pixaAddPix(pixa, pixc, L_COPY); pixaAddPix(pixa, pixc, L_INSERT); } pixDestroy(&pixr); pixDestroy(&pixrc); boxaDestroy(&boxac); boxaDestroy(&boxacs); } nchars = pixaGetCount(pixa); if (nchars != 95) return (PIXA *)ERROR_PTR("95 chars not generated", procName, NULL); *pbl0 = baseline[0]; *pbl1 = baseline[1]; *pbl2 = baseline[2]; /* Fix the space character up; it should have no ON pixels, * and be about twice as wide as the '!' character. */ pixt2 = pixaGetPix(pixa, 0, L_CLONE); width = 2 * pixGetWidth(pixt2); height = pixGetHeight(pixt2); pixDestroy(&pixt2); pixt2 = pixCreate(width, height, 1); pixaReplacePix(pixa, 0, pixt2, NULL); /* Fix up the '\' character; use a LR flip of the '/' char */ pixt2 = pixaGetPix(pixa, 15, L_CLONE); pixt3 = pixFlipLR(NULL, pixt2); pixDestroy(&pixt2); pixaReplacePix(pixa, 60, pixt3, NULL); #if DEBUG_CHARS { PIX *pixd; pixd = pixaDisplayTiled(pixa, 1500, 0, 10); pixDisplay(pixd, 100 * i, 200); pixDestroy(&pixd); } #endif /* DEBUG_CHARS */ pixDestroy(&pixs); boxaDestroy(&boxar); FREE(tab); return pixa; }
int main(int argc, char* argv[]) { PIX *pixs, *pixb, *pixt; int minb; if (argc < 2) { USAGE: fprintf(stderr, "Usage: %s </path/to/text-image>\n" "\t\t[binarize-threshold] [minw:minh:maxw:maxh]\n", strrchr(argv[0], '/') + 1); return EINVAL; } if (2 < argc) { errno = 0; minb = strtol(argv[2], NULL, 10); if (errno < 0) { fprintf(stderr, "strtol: %s\n", strerror(errno)); goto USAGE; } } else minb = 180; if (!(pixs = pixRead(argv[1]))) ; if (1 && (pixt = pixBackgroundNormMorph(pixs, NULL, 4, 5, 248))) { pixDestroy(&pixs); pixs = pixt; } else if (0 && (pixt = pixBackgroundNorm(pixs, NULL, NULL, 10, 15, 60, 40, 248, 2, 1))) { pixDestroy(&pixs); pixs = pixt; } if (1 && (pixt = pixFindSkewAndDeskew(pixs, 1, NULL, NULL))) { pixDestroy(&pixs); pixs = pixt; } if (0 && pixDisplay(pixs, 0, 0)) ; if (1) { PTA *ptas, *ptad; if (!(pixb = pixConvertTo1(pixs, minb))) ; // pixt = pixDeskewLocal(pixs, 10, 0, 0, 0.0, 0.0, 0.0)) if (!pixGetLocalSkewTransform(pixb, 10, 0, 0, 0.0, 0.0, 0.0, &ptas, &ptad)) { if ((pixt = pixProjectiveSampledPta(pixs, ptad, ptas, L_BRING_IN_WHITE))) { pixDestroy(&pixs); pixs = pixt; } ptaDestroy(&ptas); ptaDestroy(&ptad); } pixDestroy(&pixb); } if (0 && (pixt = pixGammaTRC(NULL, pixs, 1.0, 30, 180))) { pixDestroy(&pixs); pixs = pixt; } if (!(pixb = pixConvertTo1(pixs, minb))) ; if (0) { pixDestroy(&pixs); pixs = pixCopy(pixs, pixb); } // XXX: if (1) { BOX* box; int i, n, j, m; PIX *pixi, *pixl; BOXA *boxi, *boxl; int x, y, w, h, wid; int X = INT_MAX, Y = INT_MAX, W = 0, H; // XXX: do smaller(or no) pixOpenBrick if (pixGetRegionsBinary(pixb, &pixi, &pixl, NULL, 0)) ; boxl = pixConnComp(pixl, NULL, 4); n = boxaGetCount(boxl); for (i = 0; i < n; ++i) { BOXA* boxa; box = boxaGetBox(boxl, i, L_CLONE); boxGetGeometry(box, &x, &y, &w, &h); if (w < 30 || h < 30 || w < h || h < (w / 40)) { boxDestroy(&box); continue; boxaRemoveBox(boxl, i); } if (x < X) X = x; if (y < Y) Y = y; if (W < w) W = w; pixt = pixClipRectangle(pixb, box, NULL); boxDestroy(&box); // XXX: for English if (0) pixt = pixDilateBrick(pixt, pixt, h >> 1, h >> 1); else pixt = pixDilateBrick(pixt, pixt, 16 < h ? h >> 4 : 1, h << 1); if (0 && pixDisplay(pixt, 0, 0)) ; boxa = pixConnComp(pixt, NULL, 8); pixDestroy(&pixt); wid = (h * 3) >> 2; //boxaShift(boxa, x, y); m = boxaGetCount(boxa); for (j = 0; j < m; ++j) { int x0, y0, w0; box = boxaGetBox(boxa, j, L_CLONE); boxGetGeometry(box, &x0, &y0, &w0, NULL); // merge adjacent 2 or 3 small boxes if (1 && w0 < wid && (j + 1) < m) { BOX* boxn; int xn, wn; boxn = boxaGetBox(boxa, j + 1, L_CLONE); boxGetGeometry(boxn, &xn, NULL, &wn, NULL); if ((w0 = xn + wn - x0) < h) { boxaSparseClearBox(boxa, ++j); if (w0 < wid && (j + 1) < m) { boxDestroy(&boxn); boxn = boxaGetBox(boxa, j + 1, L_CLONE); boxGetGeometry(boxn, &xn, NULL, &wn, NULL); if ((wn = xn + wn - x0) < h) { boxaSparseClearBox(boxa, ++j); w0 = wn; } } boxSetGeometry(box, -1, -1, w0, -1); } boxDestroy(&boxn); } boxSetGeometry(box, x + x0, y + y0, -1, -1); boxDestroy(&box); } boxaSparseCompact(boxa); if (1 && (pixt = pixDrawBoxa(pixs, boxa, 1, 0xff000000))) { pixDestroy(&pixs); pixs = pixt; } boxaDestroy(&boxa); } H = y + h;
main(int argc, char **argv) { l_int32 i, n; l_float32 pi, angle, val; BOX *box; BOXA *boxa, *boxa1, *boxa2; NUMA *na1, *na2; PIX *pix, *pix1, *pix2, *pix3, *pixd; PIXA *pixa1, *pixa2, *pixa3, *pixa4; static char mainName[] = "inserttest"; #if 1 pi = 3.1415926535; na1 = numaCreate(500); for (i = 0; i < 500; i++) { angle = 0.02293 * i * pi; val = (l_float32)sin(angle); numaAddNumber(na1, val); } numaWrite("/tmp/junknuma1", na1); na2 = numaCopy(na1); n = numaGetCount(na2); for (i = 0; i < n; i++) { numaGetFValue(na2, i, &val); numaRemoveNumber(na2, i); numaInsertNumber(na2, i, val); } numaWrite("/tmp/junknuma2", na2); numaDestroy(&na1); numaDestroy(&na2); #endif #if 1 pix1 = pixRead("feyn.tif"); box = boxCreate(1138, 1666, 1070, 380); pix2 = pixClipRectangle(pix1, box, NULL); boxDestroy(&box); boxa1 = pixConnComp(pix2, NULL, 8); boxaWrite("/tmp/junkboxa1", boxa1); boxa2 = boxaCopy(boxa1, L_COPY); n = boxaGetCount(boxa2); for (i = 0; i < n; i++) { box = boxaGetBox(boxa2, i, L_COPY); boxaRemoveBox(boxa2, i); boxaInsertBox(boxa2, i, box); } boxaWrite("/tmp/junkboxa2", boxa2); pixDestroy(&pix1); pixDestroy(&pix2); boxaDestroy(&boxa1); boxaDestroy(&boxa2); #endif #if 1 pix1 = pixRead("feyn.tif"); box = boxCreate(1138, 1666, 1070, 380); pix2 = pixClipRectangle(pix1, box, NULL); boxDestroy(&box); boxa = pixConnComp(pix2, &pixa1, 8); boxaDestroy(&boxa); pixaWrite("/tmp/junkpixa1", pixa1); pixa2 = pixaCopy(pixa1, L_COPY); n = pixaGetCount(pixa2); /* Remove and insert each one */ for (i = 0; i < n; i++) { pix = pixaGetPix(pixa2, i, L_COPY); box = pixaGetBox(pixa2, i, L_COPY); pixaRemovePix(pixa2, i); pixaInsertPix(pixa2, i, pix, box); } pixaWrite("/tmp/junkpixa2", pixa2); /* Move the last to the beginning; do it n times */ pixa3 = pixaCopy(pixa2, L_COPY); for (i = 0; i < n; i++) { pix = pixaGetPix(pixa3, n - 1, L_CLONE); box = pixaGetBox(pixa3, n - 1, L_CLONE); pixaInsertPix(pixa3, 0, pix, box); pixaRemovePix(pixa3, n); } pixaWrite("/tmp/junkpixa3", pixa3); /* Move the first one to the end; do it n times */ pixa4 = pixaCopy(pixa3, L_COPY); for (i = 0; i < n; i++) { pix = pixaGetPix(pixa4, 0, L_CLONE); box = pixaGetBox(pixa4, 0, L_CLONE); pixaInsertPix(pixa4, n, pix, box); /* make sure insert works at end */ pixaRemovePix(pixa4, 0); } pixaWrite("/tmp/junkpixa4", pixa4); pixDestroy(&pix1); pixDestroy(&pix2); pixaDestroy(&pixa1); pixaDestroy(&pixa2); pixaDestroy(&pixa3); pixaDestroy(&pixa4); #endif return 0; }