void MakeWordBoxes2(PIX *pixs, l_int32 reduction, L_REGPARAMS *rp) { l_int32 default_minwidth = 10; l_int32 default_minheight = 10; l_int32 default_maxwidth = 400; l_int32 default_maxheight = 70; l_int32 minwidth, minheight, maxwidth, maxheight; BOXA *boxa1, *boxa2; NUMA *na; PIX *pixd1, *pixd2; PIXA *pixa; minwidth = default_minwidth / reduction; minheight = default_minheight / reduction; maxwidth = default_maxwidth / reduction; maxheight = default_maxheight / reduction; /* Get the word boxes */ pixGetWordsInTextlines(pixs, reduction, minwidth, minheight, maxwidth, maxheight, &boxa1, &pixa, &na); pixaDestroy(&pixa); numaDestroy(&na); if (reduction == 1) boxa2 = boxaCopy(boxa1, L_CLONE); else boxa2 = boxaTransform(boxa1, 0, 0, 2.0, 2.0); pixd1 = pixConvertTo8(pixs, 1); pixRenderBoxaArb(pixd1, boxa2, 2, 255, 0, 0); regTestWritePixAndCheck(rp, pixd1, IFF_PNG); pixDisplayWithTitle(pixd1, 800, 100, NULL, rp->display); boxaDestroy(&boxa1); boxaDestroy(&boxa2); /* Do it again with this interface. The result should be the same. */ pixGetWordBoxesInTextlines(pixs, reduction, minwidth, minheight, maxwidth, maxheight, &boxa1, NULL); if (reduction == 1) boxa2 = boxaCopy(boxa1, L_CLONE); else boxa2 = boxaTransform(boxa1, 0, 0, 2.0, 2.0); pixd2 = pixConvertTo8(pixs, 1); pixRenderBoxaArb(pixd2, boxa2, 2, 255, 0, 0); if (regTestComparePix(rp, pixd1, pixd2)) { L_ERROR("pix not the same", "MakeWordBoxes2"); pixDisplayWithTitle(pixd2, 800, 100, NULL, rp->display); } pixDestroy(&pixd1); pixDestroy(&pixd2); boxaDestroy(&boxa1); boxaDestroy(&boxa2); return; }
int main(int argc, char **argv) { char filename[BUF_SIZE]; char *dirin, *rootname, *fname; l_int32 i, j, w, h, firstpage, npages, nfiles, ncomp; l_int32 index, ival, rval, gval, bval; BOX *box; BOXA *boxa; BOXAA *baa; JBDATA *data; JBCLASSER *classer; NUMA *nai; NUMAA *naa; SARRAY *safiles; PIX *pixs, *pixt1, *pixt2, *pixd; PIXCMAP *cmap; static char mainName[] = "wordsinorder"; if (argc != 3 && argc != 5) return ERROR_INT( " Syntax: wordsinorder dirin rootname [firstpage, npages]", mainName, 1); dirin = argv[1]; rootname = argv[2]; if (argc == 3) { firstpage = 0; npages = 0; } else { firstpage = atoi(argv[3]); npages = atoi(argv[4]); } /* Compute the word bounding boxes at 2x reduction, along with * the textlines that they are in. */ safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages); nfiles = sarrayGetCount(safiles); baa = boxaaCreate(nfiles); naa = numaaCreate(nfiles); for (i = 0; i < nfiles; i++) { fname = sarrayGetString(safiles, i, 0); if ((pixs = pixRead(fname)) == NULL) { L_WARNING("image file %d not read\n", mainName, i); continue; } pixGetWordBoxesInTextlines(pixs, 2, MIN_WORD_WIDTH, MIN_WORD_HEIGHT, MAX_WORD_WIDTH, MAX_WORD_HEIGHT, &boxa, &nai); boxaaAddBoxa(baa, boxa, L_INSERT); numaaAddNuma(naa, nai, L_INSERT); #if RENDER_PAGES /* Show the results on a 2x reduced image, where each * word is outlined and the color of the box depends on the * computed textline. */ pixt1 = pixReduceRankBinary2(pixs, 2, NULL); pixGetDimensions(pixt1, &w, &h, NULL); pixd = pixCreate(w, h, 8); cmap = pixcmapCreateRandom(8, 1, 1); /* first color is black */ pixSetColormap(pixd, cmap); pixt2 = pixUnpackBinary(pixt1, 8, 1); pixRasterop(pixd, 0, 0, w, h, PIX_SRC | PIX_DST, pixt2, 0, 0); ncomp = boxaGetCount(boxa); for (j = 0; j < ncomp; j++) { box = boxaGetBox(boxa, j, L_CLONE); numaGetIValue(nai, j, &ival); index = 1 + (ival % 254); /* omit black and white */ pixcmapGetColor(cmap, index, &rval, &gval, &bval); pixRenderBoxArb(pixd, box, 2, rval, gval, bval); boxDestroy(&box); } snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i); fprintf(stderr, "filename: %s\n", filename); pixWrite(filename, pixd, IFF_PNG); pixDestroy(&pixt1); pixDestroy(&pixt2); pixDestroy(&pixs); pixDestroy(&pixd); #endif /* RENDER_PAGES */ } boxaaDestroy(&baa); numaaDestroy(&naa); sarrayDestroy(&safiles); return 0; }
l_int32 main(int argc, char **argv) { char buf[64]; BOXA *boxa1, *boxa2, *boxa3, *boxa4; L_DEWARP *dew; L_DEWARPA *dewa; PIX *pixs, *pixn, *pixg, *pixb, *pix2, *pix3, *pix4, *pix5, *pix6; lept_mkdir("lept"); snprintf(buf, sizeof(buf), "cat.%03d.jpg", pageno); pixs = pixRead(buf); dewa = dewarpaCreate(40, 30, 1, 15, 10); dewarpaUseBothArrays(dewa, 1); /* Normalize for varying background and binarize */ pixn = pixBackgroundNormSimple(pixs, NULL, NULL); pixg = pixConvertRGBToGray(pixn, 0.5, 0.3, 0.2); pixb = pixThresholdToBinary(pixg, 130); pixDisplay(pixb, 0, 100); /* Build the model */ dew = dewarpCreate(pixb, pageno); dewarpaInsertDewarp(dewa, dew); if (build_output) { snprintf(buf, sizeof(buf), "/tmp/lept/dewarp_build_%d.pdf", pageno); dewarpBuildPageModel(dew, buf); } else { dewarpBuildPageModel(dew, NULL); } /* Apply the model */ dewarpPopulateFullRes(dew, pixg, 0, 0); if (apply_output) { snprintf(buf, sizeof(buf), "/tmp/lept/dewarp_apply_%d.pdf", pageno); dewarpaApplyDisparity(dewa, pageno, pixb, 200, 0, 0, &pix2, buf); } else { dewarpaApplyDisparity(dewa, pageno, pixb, 200, 0, 0, &pix2, NULL); } pixDisplay(pix2, 200, 100); /* Reverse direction: get the word boxes for the dewarped pix ... */ pixGetWordBoxesInTextlines(pix2, 5, 5, 500, 100, &boxa1, NULL); pix3 = pixConvertTo32(pix2); pixRenderBoxaArb(pix3, boxa1, 2, 255, 0, 0); pixDisplay(pix3, 400, 100); /* ... and map to the word boxes for the input image */ if (map_output) { snprintf(buf, sizeof(buf), "/tmp/lept/dewarp_map1_%d.pdf", pageno); dewarpaApplyDisparityBoxa(dewa, pageno, pix2, boxa1, 0, 0, 0, &boxa2, buf); } else { dewarpaApplyDisparityBoxa(dewa, pageno, pix2, boxa1, 0, 0, 0, &boxa2, NULL); } pix4 = pixConvertTo32(pixb); pixRenderBoxaArb(pix4, boxa2, 2, 0, 255, 0); pixDisplay(pix4, 600, 100); /* Forward direction: get the word boxes for the input pix ... */ pixGetWordBoxesInTextlines(pixb, 5, 5, 500, 100, &boxa3, NULL); pix5 = pixConvertTo32(pixb); pixRenderBoxaArb(pix5, boxa3, 2, 255, 0, 0); pixDisplay(pix5, 800, 100); /* ... and map to the word boxes for the dewarped image */ if (map_output) { snprintf(buf, sizeof(buf), "/tmp/lept/dewarp_map2_%d.pdf", pageno); dewarpaApplyDisparityBoxa(dewa, pageno, pixb, boxa3, 1, 0, 0, &boxa4, buf); } else { dewarpaApplyDisparityBoxa(dewa, pageno, pixb, boxa3, 1, 0, 0, &boxa4, NULL); } pix6 = pixConvertTo32(pix2); pixRenderBoxaArb(pix6, boxa4, 2, 0, 255, 0); pixDisplay(pix6, 1000, 100); dewarpaDestroy(&dewa); pixDestroy(&pixs); pixDestroy(&pixn); pixDestroy(&pixg); pixDestroy(&pixb); pixDestroy(&pix2); pixDestroy(&pix3); pixDestroy(&pix4); pixDestroy(&pix5); pixDestroy(&pix6); boxaDestroy(&boxa1); boxaDestroy(&boxa2); boxaDestroy(&boxa3); boxaDestroy(&boxa4); return 0; }
main(int argc, char **argv) { l_int32 i, n, w, h, success, display; FILE *fp; BOXA *boxa; NUMA *naindex, *naw, *nah, *naw_med, *nah_med; PIX *pixs, *pixt, *pixd; if (regTestSetup(argc, argv, &fp, &display, &success, NULL)) return 1; /* Generate arrays of word widths and heights */ pixs = pixRead("feyn.tif"); pixGetWordBoxesInTextlines(pixs, 1, 6, 6, 500, 50, &boxa, &naindex); n = boxaGetCount(boxa); naw = numaCreate(0); nah = numaCreate(0); for (i = 0; i < n; i++) { boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h); numaAddNumber(naw, w); numaAddNumber(nah, h); } boxaDestroy(&boxa); numaDestroy(&naindex); /* Make the rank bin arrays of median values, with 10 bins */ numaGetRankBinValues(naw, 10, NULL, &naw_med); numaGetRankBinValues(nah, 10, NULL, &nah_med); gplotSimple1(naw_med, GPLOT_PNG, "/tmp/w_10bin", "width vs rank bins (10)"); gplotSimple1(nah_med, GPLOT_PNG, "/tmp/h_10bin", "height vs rank bins (10)"); numaDestroy(&naw_med); numaDestroy(&nah_med); /* Make the rank bin arrays of median values, with 30 bins */ numaGetRankBinValues(naw, 30, NULL, &naw_med); numaGetRankBinValues(nah, 30, NULL, &nah_med); gplotSimple1(naw_med, GPLOT_PNG, "/tmp/w_30bin", "width vs rank bins (30)"); gplotSimple1(nah_med, GPLOT_PNG, "/tmp/h_30bin", "height vs rank bins (30)"); numaDestroy(&naw_med); numaDestroy(&nah_med); /* Give gnuplot time to write out the files */ #ifndef _WIN32 sleep(2); #else Sleep(2000); #endif /* _WIN32 */ /* Save as golden files, or check against them */ regTestCheckFile(fp, argv, "/tmp/w_10bin.png", 0, &success); regTestCheckFile(fp, argv, "/tmp/h_10bin.png", 1, &success); regTestCheckFile(fp, argv, "/tmp/w_30bin.png", 2, &success); regTestCheckFile(fp, argv, "/tmp/h_30bin.png", 3, &success); /* Display results for debugging */ pixt = pixRead("/tmp/w_10bin.png"); pixDisplayWithTitle(pixt, 0, 0, NULL, display); pixDestroy(&pixt); pixt = pixRead("/tmp/h_10bin.png"); pixDisplayWithTitle(pixt, 650, 0, NULL, display); pixDestroy(&pixt); pixt = pixRead("/tmp/w_30bin.png"); pixDisplayWithTitle(pixt, 0, 550, NULL, display); pixDestroy(&pixt); pixt = pixRead("/tmp/h_30bin.png"); pixDisplayWithTitle(pixt, 650, 550, NULL, display); pixDestroy(&pixt); pixDestroy(&pixs); numaDestroy(&naw); numaDestroy(&nah); regTestCleanup(argc, argv, fp, success, NULL); return 0; }
int main(int argc, char **argv) { l_int32 i, n, w, h; BOXA *boxa; NUMA *naindex, *naw, *nah, *naw_med, *nah_med; PIX *pixs, *pixt; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; /* Generate arrays of word widths and heights */ pixs = pixRead("feyn.tif"); pixGetWordBoxesInTextlines(pixs, 1, 6, 6, 500, 50, &boxa, &naindex); n = boxaGetCount(boxa); naw = numaCreate(0); nah = numaCreate(0); for (i = 0; i < n; i++) { boxaGetBoxGeometry(boxa, i, NULL, NULL, &w, &h); numaAddNumber(naw, w); numaAddNumber(nah, h); } boxaDestroy(&boxa); numaDestroy(&naindex); /* Make the rank bin arrays of median values, with 10 bins */ lept_rmfile("/tmp/lept/regout/w_10bin.png"); /* remove existing ones */ lept_rmfile("/tmp/lept/regout/h_10bin.png"); lept_rmfile("/tmp/lept/regout/w_30bin.png"); lept_rmfile("/tmp/lept/regout/h_30bin.png"); numaGetRankBinValues(naw, 10, NULL, &naw_med); numaGetRankBinValues(nah, 10, NULL, &nah_med); gplotSimple1(naw_med, GPLOT_PNG, "/tmp/lept/regout/w_10bin", "width vs rank bins (10)"); gplotSimple1(nah_med, GPLOT_PNG, "/tmp/lept/regout/h_10bin", "height vs rank bins (10)"); numaDestroy(&naw_med); numaDestroy(&nah_med); /* Make the rank bin arrays of median values, with 30 bins */ numaGetRankBinValues(naw, 30, NULL, &naw_med); numaGetRankBinValues(nah, 30, NULL, &nah_med); gplotSimple1(naw_med, GPLOT_PNG, "/tmp/lept/regout/w_30bin", "width vs rank bins (30)"); gplotSimple1(nah_med, GPLOT_PNG, "/tmp/lept/regout/h_30bin", "height vs rank bins (30)"); numaDestroy(&naw_med); numaDestroy(&nah_med); /* Save as golden files, or check against them */ regTestCheckFile(rp, "/tmp/lept/regout/w_10bin.png"); /* 0 */ regTestCheckFile(rp, "/tmp/lept/regout/h_10bin.png"); /* 1 */ regTestCheckFile(rp, "/tmp/lept/regout/w_30bin.png"); /* 2 */ regTestCheckFile(rp, "/tmp/lept/regout/h_30bin.png"); /* 3 */ /* Display results for debugging */ pixt = pixRead("/tmp/lept/regout/w_10bin.png"); pixDisplayWithTitle(pixt, 0, 0, NULL, rp->display); pixDestroy(&pixt); pixt = pixRead("/tmp/lept/regout/h_10bin.png"); pixDisplayWithTitle(pixt, 650, 0, NULL, rp->display); pixDestroy(&pixt); pixt = pixRead("/tmp/lept/regout/w_30bin.png"); pixDisplayWithTitle(pixt, 0, 550, NULL, rp->display); pixDestroy(&pixt); pixt = pixRead("/tmp/lept/regout/h_30bin.png"); pixDisplayWithTitle(pixt, 650, 550, NULL, rp->display); pixDestroy(&pixt); pixDestroy(&pixs); numaDestroy(&naw); numaDestroy(&nah); return regTestCleanup(rp); }