std::vector<Figure> extractFigures(PIX *original, PageRegions &pageRegions, DocumentStatistics &docStats, bool verbose, bool showSteps, std::vector<Figure> &errors) { BOXA *bodytext = pageRegions.bodytext; BOXA *graphics = pageRegions.graphics; BOXA *captions = pageRegions.getCaptionsBoxa(); std::vector<Caption> unassigned_captions = pageRegions.captions; int total_captions = captions->n; PIXA *steps = showSteps ? pixaCreate(4) : NULL; // Add bodyText boxes to fill up the margin BOX *margin; BOX *foreground; pixClipToForeground(original, NULL, &foreground); BOX *extent; boxaGetExtent(graphics, NULL, NULL, &extent); margin = boxBoundingRegion(extent, foreground); boxDestroy(&extent); boxaGetExtent(bodytext, NULL, NULL, &extent); margin = boxBoundingRegion(margin, extent); boxDestroy(&extent); boxaGetExtent(pageRegions.other, NULL, NULL, &extent); margin = boxBoundingRegion(margin, extent); int x = margin->x - 2, y = margin->y - 2, h = margin->h + 4, w = margin->w + 4; x = std::max(x, 0); y = std::max(y, 0); h = std::min((int)original->h, h); w = std::min((int)original->w, w); boxDestroy(&margin); boxaAddBox(bodytext, boxCreate(0, 0, original->w, y), L_CLONE); boxaAddBox(bodytext, boxCreate(0, y + h, original->w, original->h - y - h), L_CLONE); boxaAddBox(bodytext, boxCreate(0, 0, x, original->h), L_CLONE); boxaAddBox(bodytext, boxCreate(x + w, 0, original->w - x - w, original->h), L_CLONE); // Add captions to body text boxaJoin(bodytext, captions, 0, captions->n); if (showSteps) pixaAddPix(steps, original, L_CLONE); // Generate proposed regions for each caption box double center = original->w / 2.0; BOXAA *allProposals = boxaaCreate(captions->n); BOXA *claimedImages = boxaCreate(captions->n); for (int i = 0; i < captions->n; i++) { BOX *captBox = boxaGetBox(captions, i, L_CLONE); BOXA *proposals = boxaCreate(4); for (int j = 0; j < bodytext->n; j++) { BOX *txtBox = boxaGetBox(bodytext, j, L_CLONE); BOX *proposal = NULL; int tolerance = 2; int horizontal = 0; int vertical = 0; boxAlignment(captBox, txtBox, tolerance, &horizontal, &vertical); if (vertical * horizontal != 0 or (vertical == 0 and horizontal == 0)) { continue; } if (vertical == 0) { if (horizontal == 1) { proposal = boxRelocateOneSide(NULL, captBox, txtBox->x + txtBox->w + 2, L_FROM_LEFT); } else if (horizontal == -1) { proposal = boxRelocateOneSide(NULL, captBox, txtBox->x - 2, L_FROM_RIGHT); } boxExpandUD(proposal, bodytext); if (horizontal == -1) { proposal->w -= captBox->w + 1; proposal->x = captBox->x + captBox->w + 1; } else if (horizontal == 1) { proposal->w -= captBox->w + 1; } } else { if (vertical == 1) { proposal = boxRelocateOneSide(NULL, captBox, txtBox->y + txtBox->h + 3, L_FROM_TOP); } else if (vertical == -1) { proposal = boxRelocateOneSide(NULL, captBox, txtBox->y - 3, L_FROM_BOT); } boxExpandLR(proposal, bodytext); if (vertical == -1) { proposal->h -= captBox->h + 1; proposal->y = captBox->y + captBox->h + 1; } else if (vertical == 1) { proposal->h -= captBox->h + 1; } } // For two columns document, captions that do not // cross the center should not have regions pass the center if (docStats.documentIsTwoColumn()) { if (captBox->x + captBox->w <= center and proposal->x + proposal->w > center) { boxRelocateOneSide(proposal, proposal, center - 1, L_FROM_RIGHT); } else if (captBox->x >= center and proposal->x < center) { boxRelocateOneSide(proposal, proposal, center + 1, L_FROM_LEFT); } } BOX *clippedProposal; pixClipBoxToForeground(original, proposal, NULL, &clippedProposal); if (clippedProposal != NULL and scoreBox(clippedProposal, pageRegions.captions.at(i).type, bodytext, graphics, claimedImages, original) > 0) { boxaAddBox(proposals, clippedProposal, L_CLONE); } } if (proposals->n > 0) { boxaaAddBoxa(allProposals, proposals, L_CLONE); } else { // Give up on this caption int on_caption = i - (total_captions - unassigned_captions.size()); errors.push_back(Figure(unassigned_captions.at(on_caption), NULL)); unassigned_captions.erase(unassigned_captions.begin() + on_caption); } } std::vector<Figure> figures = std::vector<Figure>(); if (unassigned_captions.size() == 0) { return figures; } // Now go through every possible assignment of captions // to proposals pick the highest scorign one int numConfigurations = 1; for (int i = 0; i < allProposals->n; ++i) { numConfigurations *= allProposals->boxa[i]->n; } if (verbose) printf("Found %d possible configurations\n", numConfigurations); BOXA *bestProposals = NULL; std::vector<bool> bestKeep; int bestFound = -1; double bestScore = -1; for (int onConfig = 0; onConfig < numConfigurations; ++onConfig) { // Gather the proposed regions based on the configuration number int configNum = onConfig; BOXA *proposals = boxaCreate(allProposals->n); std::vector<bool> keep; for (int i = 0; i < allProposals->n; ++i) { int numProposals = allProposals->boxa[i]->n; int selected = configNum % numProposals; configNum = configNum / numProposals; boxaAddBox(proposals, allProposals->boxa[i]->box[selected], L_COPY); } // Attempt to split any overlapping regions for (int i = 0; i < proposals->n; ++i) { for (int j = i; j < proposals->n; ++j) { BOX *p1 = proposals->box[i]; BOX *p2 = proposals->box[j]; int eq; boxEqual(p1, p2, &eq); if (not eq) continue; int vertical, horizontal; boxAlignment(unassigned_captions.at(i).boundingBox, unassigned_captions.at(j).boundingBox, 2, &horizontal, &vertical); if (vertical == 0 or horizontal != 0) continue; double split = splitBoxVertical(original, p1); if (split > 0) { BOX *topClipped; BOX *botClipped; BOX *top = boxRelocateOneSide(NULL, p1, split - 1, L_FROM_BOT); pixClipBoxToForeground(original, top, NULL, &topClipped); BOX *bot = boxRelocateOneSide(NULL, p1, split + 1, L_FROM_TOP); pixClipBoxToForeground(original, bot, NULL, &botClipped); if (vertical == -1) { proposals->box[i] = topClipped; proposals->box[j] = botClipped; } else { proposals->box[i] = botClipped; proposals->box[j] = topClipped; } if (verbose) printf("Split a region vertically\n"); } } } if (showSteps) { pixaAddPix(steps, pixDrawBoxa(original, proposals, 4, 0xff000000), L_CLONE); } // Score the proposals int numFound = 0; double totalScore = 0; for (int i = 0; i < proposals->n; ++i) { double score = scoreBox(proposals->box[i], pageRegions.captions.at(i).type, bodytext, graphics, proposals, original); totalScore += score; if (score > 0) { numFound += 1; keep.push_back(true); } else { keep.push_back(false); } } // Switch in for the current best needed if (numFound > bestFound or (numFound == bestFound and totalScore > bestScore)) { bestFound = numFound; bestScore = totalScore; bestProposals = proposals; bestKeep = keep; } } if (showSteps) { BOX *clip; PIXA *show = pixaCreate(4); pixClipBoxToForeground(original, NULL, NULL, &clip); int pad = 10; clip->x -= 10; clip->y -= 10; clip->w += pad * 2; clip->h += pad * 2; for (int i = 0; i < steps->n; ++i) { pixaAddPix(show, pixClipRectangle(steps->pix[i], clip, NULL), L_CLONE); } pixDisplay(pixaDisplayTiled(pixaConvertTo32(show), 4000, 1, 30), 0, 0); } for (int i = 0; i < bestProposals->n; ++i) { if (bestKeep.at(i)) { BOX *imageBox = bestProposals->box[i]; int pad = 2; imageBox->x -= pad; imageBox->y -= pad; imageBox->w += pad * 2; imageBox->h += pad * 2; figures.push_back(Figure(unassigned_captions.at(i), imageBox)); } else { errors.push_back(Figure(unassigned_captions.at(i), NULL)); } } return figures; }
main(int argc, char **argv) { char *filein, *fileout; l_int32 w, h, d, w2, h2, i, ncols; l_float32 angle, conf; BOX *box; BOXA *boxa, *boxas, *boxad, *boxa2; NUMA *numa; PIX *pixs, *pixt, *pixb, *pixb2, *pixd; PIX *pixtlm, *pixvws; PIX *pixt1, *pixt2, *pixt3, *pixt4, *pixt5, *pixt6; PIXA *pixam, *pixac, *pixad, *pixat; PIXAA *pixaa, *pixaa2; PTA *pta; SEL *selsplit; static char mainName[] = "textlinemask"; if (argc != 3) exit(ERROR_INT(" Syntax: textlinemask filein fileout", mainName, 1)); filein = argv[1]; fileout = argv[2]; pixDisplayWrite(NULL, -1); /* init debug output */ if ((pixs = pixRead(filein)) == NULL) return ERROR_INT("pixs not made", mainName, 1); pixGetDimensions(pixs, &w, &h, &d); /* Binarize input */ if (d == 8) pixt = pixThresholdToBinary(pixs, 128); else if (d == 1) pixt = pixClone(pixs); else { fprintf(stderr, "depth is %d\n", d); exit(1); } /* Deskew */ pixb = pixFindSkewAndDeskew(pixt, 1, &angle, &conf); pixDestroy(&pixt); fprintf(stderr, "Skew angle: %7.2f degrees; %6.2f conf\n", angle, conf); pixDisplayWrite(pixb, DEBUG_OUTPUT); #if 1 /* Use full image morphology to find columns, at 2x reduction. * This only works for very simple layouts where each column * of text extends the full height of the input image. * pixam has a pix component over each column. */ pixb2 = pixReduceRankBinary2(pixb, 2, NULL); pixt1 = pixMorphCompSequence(pixb2, "c5.500", 0); boxa = pixConnComp(pixt1, &pixam, 8); ncols = boxaGetCount(boxa); fprintf(stderr, "Num columns: %d\n", ncols); pixDisplayWrite(pixt1, DEBUG_OUTPUT); /* Use selective region-based morphology to get the textline mask. */ pixad = pixaMorphSequenceByRegion(pixb2, pixam, "c100.3", 0, 0); pixGetDimensions(pixb2, &w2, &h2, NULL); if (DEBUG_OUTPUT) { pixt2 = pixaDisplay(pixad, w2, h2); pixDisplayWrite(pixt2, DEBUG_OUTPUT); pixDestroy(&pixt2); } /* Some of the lines may be touching, so use a HMT to split the * lines in each column, and use a pixaa to save the results. */ selsplit = selCreateFromString(seltext, 17, 7, "selsplit"); pixaa = pixaaCreate(ncols); for (i = 0; i < ncols; i++) { pixt3 = pixaGetPix(pixad, i, L_CLONE); box = pixaGetBox(pixad, i, L_COPY); pixt4 = pixHMT(NULL, pixt3, selsplit); pixXor(pixt4, pixt4, pixt3); boxa2 = pixConnComp(pixt4, &pixac, 8); pixaaAddPixa(pixaa, pixac, L_INSERT); pixaaAddBox(pixaa, box, L_INSERT); if (DEBUG_OUTPUT) { pixt5 = pixaDisplayRandomCmap(pixac, 0, 0); pixDisplayWrite(pixt5, DEBUG_OUTPUT); fprintf(stderr, "Num textlines in col %d: %d\n", i, boxaGetCount(boxa2)); pixDestroy(&pixt5); } pixDestroy(&pixt3); pixDestroy(&pixt4); boxaDestroy(&boxa2); } /* Visual output */ if (DEBUG_OUTPUT) { pixDisplayMultiple("/tmp/junk_write_display*"); pixat = pixaReadFiles("/tmp", "junk_write_display"); pixt5 = selDisplayInPix(selsplit, 31, 2); pixaAddPix(pixat, pixt5, L_INSERT); pixt6 = pixaDisplayTiledAndScaled(pixat, 32, 400, 3, 0, 35, 3); pixWrite(fileout, pixt6, IFF_PNG); pixaDestroy(&pixat); pixDestroy(&pixt6); } /* Test pixaa I/O */ pixaaWrite("/tmp/junkpixaa", pixaa); pixaa2 = pixaaRead("/tmp/junkpixaa"); pixaaWrite("/tmp/junkpixaa2", pixaa2); /* Test pixaa display */ pixd = pixaaDisplay(pixaa, w2, h2); pixWrite("/tmp/junkdisplay", pixd, IFF_PNG); pixDestroy(&pixd); /* Cleanup */ pixDestroy(&pixb2); pixDestroy(&pixt1); pixaDestroy(&pixam); pixaDestroy(&pixad); pixaaDestroy(&pixaa); pixaaDestroy(&pixaa2); boxaDestroy(&boxa); selDestroy(&selsplit); #endif #if 0 /* Use the baseline finder; not really what is needed */ numa = pixFindBaselines(pixb, &pta, 1); #endif #if 0 /* Use the textline mask function; parameters are not quite right */ pixb2 = pixReduceRankBinary2(pixb, 2, NULL); pixtlm = pixGenTextlineMask(pixb2, &pixvws, NULL, 1); pixDisplay(pixtlm, 0, 100); pixDisplay(pixvws, 500, 100); pixDestroy(&pixb2); pixDestroy(&pixtlm); pixDestroy(&pixvws); #endif #if 0 /* Use the Breuel whitespace partition method; slow and we would * still need to work to extract the fg regions. */ pixb2 = pixReduceRankBinary2(pixb, 2, NULL); boxas = pixConnComp(pixb2, NULL, 8); boxad = boxaGetWhiteblocks(boxas, NULL, L_SORT_BY_HEIGHT, 3, 0.1, 200, 0.2, 0); pixd = pixDrawBoxa(pixb2, boxad, 7, 0xe0708000); pixDisplay(pixd, 100, 500); pixDestroy(&pixb2); pixDestroy(&pixd); boxaDestroy(&boxas); boxaDestroy(&boxad); #endif #if 0 /* Use morphology to find columns and then selective * region-based morphology to get the textline mask. * This is for display; we really want to get a pixa of the * specific textline masks. */ startTimer(); pixb2 = pixReduceRankBinary2(pixb, 2, NULL); pixt1 = pixMorphCompSequence(pixb2, "c5.500", 0); /* column mask */ pixt2 = pixMorphSequenceByRegion(pixb2, pixt1, "c100.3", 8, 0, 0, &boxa); fprintf(stderr, "time = %7.3f sec\n", stopTimer()); pixDisplay(pixt1, 100, 500); pixDisplay(pixt2, 800, 500); pixDestroy(&pixb2); pixDestroy(&pixt1); pixDestroy(&pixt2); boxaDestroy(&boxa); #endif pixDestroy(&pixs); pixDestroy(&pixb); exit(0); }
main(int argc, char **argv) { char *filename; l_int32 w, h, type, maxboxes; l_float32 ovlap; BOX *box; BOXA *boxa, *boxat, *boxad; PIX *pix, *pixt, *pixs, *pixd; static char mainName[] = "partitiontest"; if (argc != 3 && argc != 5) return ERROR_INT("syntax: partitiontest <fname> type [maxboxes ovlap]", mainName, 1); filename = argv[1]; type = atoi(argv[2]); if (type == L_SORT_BY_WIDTH) fprintf(stderr, "Sorting by width:\n"); else if (type == L_SORT_BY_HEIGHT) fprintf(stderr, "Sorting by height:\n"); else if (type == L_SORT_BY_MAX_DIMENSION) fprintf(stderr, "Sorting by maximum dimension:\n"); else if (type == L_SORT_BY_MIN_DIMENSION) fprintf(stderr, "Sorting by minimum dimension:\n"); else if (type == L_SORT_BY_PERIMETER) fprintf(stderr, "Sorting by perimeter:\n"); else if (type == L_SORT_BY_AREA) fprintf(stderr, "Sorting by area:\n"); else { fprintf(stderr, "Use one of the following for 'type':\n" " 5: L_SORT_BY_WIDTH\n" " 6: L_SORT_BY_HEIGHT\n" " 7: L_SORT_BY_MIN_DIMENSION\n" " 8: L_SORT_BY_MAX_DIMENSION\n" " 9: L_SORT_BY_PERIMETER\n" " 10: L_SORT_BY_AREA\n"); return ERROR_INT("invalid type: see source", mainName, 1); } if (argc == 5) { maxboxes = atoi(argv[3]); ovlap = atof(argv[4]); } else { maxboxes = 100; ovlap = 0.2; } pix = pixRead(filename); pixs = pixConvertTo1(pix, 128); pixDilateBrick(pixs, pixs, 5, 5); boxa = pixConnComp(pixs, NULL, 4); pixGetDimensions(pixs, &w, &h, NULL); box = boxCreate(0, 0, w, h); startTimer(); boxaPermuteRandom(boxa, boxa); boxat = boxaSelectBySize(boxa, 500, 500, L_SELECT_IF_BOTH, L_SELECT_IF_LT, NULL); boxad = boxaGetWhiteblocks(boxat, box, type, maxboxes, ovlap, 200, 0.15, 20000); fprintf(stderr, "Time: %7.3f sec\n", stopTimer()); boxaWriteStream(stderr, boxad); pixDisplayWrite(NULL, -1); pixDisplayWrite(pixs, REDUCTION); /* Display box outlines in a single color in a cmapped image */ pixd = pixDrawBoxa(pixs, boxad, 7, 0xe0708000); pixDisplayWrite(pixd, REDUCTION); pixDestroy(&pixd); /* Display box outlines in a single color in an RGB image */ pixt = pixConvertTo8(pixs, FALSE); pixd = pixDrawBoxa(pixt, boxad, 7, 0x40a0c000); pixDisplayWrite(pixd, REDUCTION); pixDestroy(&pixt); pixDestroy(&pixd); /* Display box outlines with random colors in a cmapped image */ pixd = pixDrawBoxaRandom(pixs, boxad, 7); pixDisplayWrite(pixd, REDUCTION); pixDestroy(&pixd); /* Display box outlines with random colors in an RGB image */ pixt = pixConvertTo8(pixs, FALSE); pixd = pixDrawBoxaRandom(pixt, boxad, 7); pixDisplayWrite(pixd, REDUCTION); pixDestroy(&pixt); pixDestroy(&pixd); /* Display boxes in the same color in a cmapped image */ pixd = pixPaintBoxa(pixs, boxad, 0x60e0a000); pixDisplayWrite(pixd, REDUCTION); pixDestroy(&pixd); /* Display boxes in the same color in an RGB image */ pixt = pixConvertTo8(pixs, FALSE); pixd = pixPaintBoxa(pixt, boxad, 0xc030a000); pixDisplayWrite(pixd, REDUCTION); pixDestroy(&pixt); pixDestroy(&pixd); /* Display boxes in random colors in a cmapped image */ pixd = pixPaintBoxaRandom(pixs, boxad); pixDisplayWrite(pixd, REDUCTION); pixDestroy(&pixd); /* Display boxes in random colors in an RGB image */ pixt = pixConvertTo8(pixs, FALSE); pixd = pixPaintBoxaRandom(pixt, boxad); pixDisplayWrite(pixd, REDUCTION); pixDestroy(&pixt); pixDestroy(&pixd); pixDisplayMultiple("/tmp/junk_write_display*"); pixDestroy(&pix); pixDestroy(&pixs); boxDestroy(&box); boxaDestroy(&boxa); boxaDestroy(&boxat); boxaDestroy(&boxad); return 0; }
int main(int argc, char* argv[]) { PIX *pixs, *pixb, *pixt; int minb; if (argc < 2) { USAGE: fprintf(stderr, "Usage: %s </path/to/text-image>\n" "\t\t[binarize-threshold] [minw:minh:maxw:maxh]\n", strrchr(argv[0], '/') + 1); return EINVAL; } if (2 < argc) { errno = 0; minb = strtol(argv[2], NULL, 10); if (errno < 0) { fprintf(stderr, "strtol: %s\n", strerror(errno)); goto USAGE; } } else minb = 180; if (!(pixs = pixRead(argv[1]))) ; if (1 && (pixt = pixBackgroundNormMorph(pixs, NULL, 4, 5, 248))) { pixDestroy(&pixs); pixs = pixt; } else if (0 && (pixt = pixBackgroundNorm(pixs, NULL, NULL, 10, 15, 60, 40, 248, 2, 1))) { pixDestroy(&pixs); pixs = pixt; } if (1 && (pixt = pixFindSkewAndDeskew(pixs, 1, NULL, NULL))) { pixDestroy(&pixs); pixs = pixt; } if (0 && pixDisplay(pixs, 0, 0)) ; if (1) { PTA *ptas, *ptad; if (!(pixb = pixConvertTo1(pixs, minb))) ; // pixt = pixDeskewLocal(pixs, 10, 0, 0, 0.0, 0.0, 0.0)) if (!pixGetLocalSkewTransform(pixb, 10, 0, 0, 0.0, 0.0, 0.0, &ptas, &ptad)) { if ((pixt = pixProjectiveSampledPta(pixs, ptad, ptas, L_BRING_IN_WHITE))) { pixDestroy(&pixs); pixs = pixt; } ptaDestroy(&ptas); ptaDestroy(&ptad); } pixDestroy(&pixb); } if (0 && (pixt = pixGammaTRC(NULL, pixs, 1.0, 30, 180))) { pixDestroy(&pixs); pixs = pixt; } if (!(pixb = pixConvertTo1(pixs, minb))) ; if (0) { pixDestroy(&pixs); pixs = pixCopy(pixs, pixb); } // XXX: if (1) { BOX* box; int i, n, j, m; PIX *pixi, *pixl; BOXA *boxi, *boxl; int x, y, w, h, wid; int X = INT_MAX, Y = INT_MAX, W = 0, H; // XXX: do smaller(or no) pixOpenBrick if (pixGetRegionsBinary(pixb, &pixi, &pixl, NULL, 0)) ; boxl = pixConnComp(pixl, NULL, 4); n = boxaGetCount(boxl); for (i = 0; i < n; ++i) { BOXA* boxa; box = boxaGetBox(boxl, i, L_CLONE); boxGetGeometry(box, &x, &y, &w, &h); if (w < 30 || h < 30 || w < h || h < (w / 40)) { boxDestroy(&box); continue; boxaRemoveBox(boxl, i); } if (x < X) X = x; if (y < Y) Y = y; if (W < w) W = w; pixt = pixClipRectangle(pixb, box, NULL); boxDestroy(&box); // XXX: for English if (0) pixt = pixDilateBrick(pixt, pixt, h >> 1, h >> 1); else pixt = pixDilateBrick(pixt, pixt, 16 < h ? h >> 4 : 1, h << 1); if (0 && pixDisplay(pixt, 0, 0)) ; boxa = pixConnComp(pixt, NULL, 8); pixDestroy(&pixt); wid = (h * 3) >> 2; //boxaShift(boxa, x, y); m = boxaGetCount(boxa); for (j = 0; j < m; ++j) { int x0, y0, w0; box = boxaGetBox(boxa, j, L_CLONE); boxGetGeometry(box, &x0, &y0, &w0, NULL); // merge adjacent 2 or 3 small boxes if (1 && w0 < wid && (j + 1) < m) { BOX* boxn; int xn, wn; boxn = boxaGetBox(boxa, j + 1, L_CLONE); boxGetGeometry(boxn, &xn, NULL, &wn, NULL); if ((w0 = xn + wn - x0) < h) { boxaSparseClearBox(boxa, ++j); if (w0 < wid && (j + 1) < m) { boxDestroy(&boxn); boxn = boxaGetBox(boxa, j + 1, L_CLONE); boxGetGeometry(boxn, &xn, NULL, &wn, NULL); if ((wn = xn + wn - x0) < h) { boxaSparseClearBox(boxa, ++j); w0 = wn; } } boxSetGeometry(box, -1, -1, w0, -1); } boxDestroy(&boxn); } boxSetGeometry(box, x + x0, y + y0, -1, -1); boxDestroy(&box); } boxaSparseCompact(boxa); if (1 && (pixt = pixDrawBoxa(pixs, boxa, 1, 0xff000000))) { pixDestroy(&pixs); pixs = pixt; } boxaDestroy(&boxa); } H = y + h;