Exemplo n.º 1
0
/*!
 * \brief   boxaaQuadtreeRegions()
 *
 * \param[in]    w, h     size of pix that is being quadtree-ized
 * \param[in]    nlevels  number of levels in quadtree
 * \return  baa for quadtree regions at each level, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) The returned boxaa has %nlevels of boxa, each containing
 *          the set of rectangles at that level.  The rectangle at
 *          level 0 is the entire region; at level 1 the region is
 *          divided into 4 rectangles, and at level n there are n^4
 *          rectangles.
 *      (2) At each level, the rectangles in the boxa are in "raster"
 *          order, with LR (fast scan) and TB (slow scan).
 * </pre>
 */
BOXAA *
boxaaQuadtreeRegions(l_int32  w,
                     l_int32  h,
                     l_int32  nlevels)
{
l_int32   i, j, k, maxpts, nside, nbox, bw, bh;
l_int32  *xstart, *xend, *ystart, *yend;
BOX      *box;
BOXA     *boxa;
BOXAA    *baa;

    PROCNAME("boxaaQuadtreeRegions");

    if (nlevels < 1)
        return (BOXAA *)ERROR_PTR("nlevels must be >= 1", procName, NULL);
    if (w < (1 << (nlevels - 1)))
        return (BOXAA *)ERROR_PTR("w doesn't support nlevels", procName, NULL);
    if (h < (1 << (nlevels - 1)))
        return (BOXAA *)ERROR_PTR("h doesn't support nlevels", procName, NULL);

    baa = boxaaCreate(nlevels);
    maxpts = 1 << (nlevels - 1);
    xstart = (l_int32 *)LEPT_CALLOC(maxpts, sizeof(l_int32));
    xend = (l_int32 *)LEPT_CALLOC(maxpts, sizeof(l_int32));
    ystart = (l_int32 *)LEPT_CALLOC(maxpts, sizeof(l_int32));
    yend = (l_int32 *)LEPT_CALLOC(maxpts, sizeof(l_int32));
    for (k = 0; k < nlevels; k++) {
        nside = 1 << k;  /* number of boxes in each direction */
        for (i = 0; i < nside; i++) {
            xstart[i] = (w - 1) * i / nside;
            if (i > 0) xstart[i]++;
            xend[i] = (w - 1) * (i + 1) / nside;
            ystart[i] = (h - 1) * i / nside;
            if (i > 0) ystart[i]++;
            yend[i] = (h - 1) * (i + 1) / nside;
#if DEBUG_BOXES
            fprintf(stderr,
               "k = %d, xs[%d] = %d, xe[%d] = %d, ys[%d] = %d, ye[%d] = %d\n",
                    k, i, xstart[i], i, xend[i], i, ystart[i], i, yend[i]);
#endif  /* DEBUG_BOXES */
        }
        nbox = 1 << (2 * k);
        boxa = boxaCreate(nbox);
        for (i = 0; i < nside; i++) {
            bh = yend[i] - ystart[i] + 1;
            for (j = 0; j < nside; j++) {
                bw = xend[j] - xstart[j] + 1;
                box = boxCreate(xstart[j], ystart[i], bw, bh);
                boxaAddBox(boxa, box, L_INSERT);
            }
        }
        boxaaAddBoxa(baa, boxa, L_INSERT);
    }

    LEPT_FREE(xstart);
    LEPT_FREE(xend);
    LEPT_FREE(ystart);
    LEPT_FREE(yend);
    return baa;
}
Exemplo n.º 2
0
/*!
 *  boxaEncapsulateAligned()
 *
 *      Input:  boxa
 *              num (number put into each boxa in the baa)
 *              copyflag  (L_COPY or L_CLONE)
 *      Return: boxaa, or null on error
 *
 *  Notes:
 *      (1) This puts @num boxes from the input @boxa into each of a
 *          set of boxa within an output boxaa.
 *      (2) This assumes that the boxes in @boxa are in sets of @num each.
 */
BOXAA *
boxaEncapsulateAligned(BOXA    *boxa,
                       l_int32  num,
                       l_int32  copyflag)
{
l_int32  i, j, n, nbaa, index;
BOX     *box;
BOXA    *boxat;
BOXAA   *baa;

    PROCNAME("boxaEncapsulateAligned");

    if (!boxa)
        return (BOXAA *)ERROR_PTR("boxa not defined", procName, NULL);
    if (copyflag != L_COPY && copyflag != L_CLONE)
        return (BOXAA *)ERROR_PTR("invalid copyflag", procName, NULL);

    n = boxaGetCount(boxa);
    nbaa = (n + num - 1) / num;
    if (n / num != nbaa)
        L_ERROR("inconsistent alignment: n / num not an integer", procName);
    baa = boxaaCreate(nbaa);
    for (i = 0, index = 0; i < nbaa; i++) {
        boxat = boxaCreate(num);
        for (j = 0; j < num; j++, index++) {
            box = boxaGetBox(boxa, index, copyflag);
            boxaAddBox(boxat, box, L_INSERT);
        }
        boxaaAddBoxa(baa, boxat, L_INSERT);
    }

    return baa;
}
/*!
 *  boxaaReadStream()
 *
 *      Input:  stream
 *      Return: boxaa, or null on error
 */
BOXAA *
boxaaReadStream(FILE  *fp)
{
l_int32  n, i, x, y, w, h, version;
l_int32  ignore;
BOXA    *boxa;
BOXAA   *baa;

    PROCNAME("boxaaReadStream");

    if (!fp)
        return (BOXAA *)ERROR_PTR("stream not defined", procName, NULL);

    if (fscanf(fp, "\nBoxaa Version %d\n", &version) != 1)
        return (BOXAA *)ERROR_PTR("not a boxaa file", procName, NULL);
    if (version != BOXAA_VERSION_NUMBER)
        return (BOXAA *)ERROR_PTR("invalid boxa version", procName, NULL);
    if (fscanf(fp, "Number of boxa = %d\n", &n) != 1)
        return (BOXAA *)ERROR_PTR("not a boxaa file", procName, NULL);

    if ((baa = boxaaCreate(n)) == NULL)
        return (BOXAA *)ERROR_PTR("boxaa not made", procName, NULL);

    for (i = 0; i < n; i++) {
        if (fscanf(fp, "\nBoxa[%d] extent: x = %d, y = %d, w = %d, h = %d",
                   &ignore, &x, &y, &w, &h) != 5)
            return (BOXAA *)ERROR_PTR("boxa descr not valid", procName, NULL);
        if ((boxa = boxaReadStream(fp)) == NULL)
            return (BOXAA *)ERROR_PTR("boxa not made", procName, NULL);
        boxaaAddBoxa(baa, boxa, L_INSERT);
    }

    return baa;
}
Exemplo n.º 4
0
/*!
 *  boxaSort2dByIndex()
 *
 *      Input:  boxas
 *              naa (numaa that maps from the new baa to the input boxa)
 *      Return: baa (sorted boxaa), or null on error
 */
BOXAA *
boxaSort2dByIndex(BOXA   *boxas,
                  NUMAA  *naa)
{
l_int32  ntot, boxtot, i, j, n, nn, index;
BOX     *box;
BOXA    *boxa;
BOXAA   *baa;
NUMA    *na;

    PROCNAME("boxaSort2dByIndex");

    if (!boxas)
        return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL);
    if (!naa)
        return (BOXAA *)ERROR_PTR("naindex not defined", procName, NULL);

        /* Check counts */
    ntot = numaaGetNumberCount(naa);
    boxtot = boxaGetCount(boxas);
    if (ntot != boxtot)
        return (BOXAA *)ERROR_PTR("element count mismatch", procName, NULL);

    n = numaaGetCount(naa);
    baa = boxaaCreate(n);
    for (i = 0; i < n; i++) {
        na = numaaGetNuma(naa, i, L_CLONE);
        nn = numaGetCount(na);
        boxa = boxaCreate(nn);
        for (j = 0; j < nn; j++) {
            numaGetIValue(na, i, &index);
            box = boxaGetBox(boxas, index, L_COPY);
            boxaAddBox(boxa, box, L_INSERT);
        }
        boxaaAddBoxa(baa, boxa, L_INSERT);
        numaDestroy(&na);
    }

    return baa;
}
Exemplo n.º 5
0
/*!
 * \brief   boxaaSelectRange()
 *
 * \param[in]    baas
 * \param[in]    first      use 0 to select from the beginning
 * \param[in]    last       use -1 to select to the end
 * \param[in]    copyflag   L_COPY, L_CLONE
 * \return  baad, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) The copyflag specifies what we do with each boxa from baas.
 *          Specifically, L_CLONE inserts a clone into baad of each
 *          selected boxa from baas.
 * </pre>
 */
BOXAA *
boxaaSelectRange(BOXAA   *baas,
                 l_int32  first,
                 l_int32  last,
                 l_int32  copyflag)
{
l_int32  n, nboxa, i;
BOXA    *boxa;
BOXAA   *baad;

    PROCNAME("boxaaSelectRange");

    if (!baas)
        return (BOXAA *)ERROR_PTR("baas not defined", procName, NULL);
    if (copyflag != L_COPY && copyflag != L_CLONE)
        return (BOXAA *)ERROR_PTR("invalid copyflag", procName, NULL);
    if ((n = boxaaGetCount(baas)) == 0)
        return (BOXAA *)ERROR_PTR("empty baas", procName, NULL);
    first = L_MAX(0, first);
    if (last < 0) last = n - 1;
    if (first >= n)
        return (BOXAA *)ERROR_PTR("invalid first", procName, NULL);
    if (last >= n) {
        L_WARNING("last = %d is beyond max index = %d; adjusting\n",
                  procName, last, n - 1);
        last = n - 1;
    }
    if (first > last)
        return (BOXAA *)ERROR_PTR("first > last", procName, NULL);

    nboxa = last - first + 1;
    baad = boxaaCreate(nboxa);
    for (i = first; i <= last; i++) {
        boxa = boxaaGetBoxa(baas, i, copyflag);
        boxaaAddBoxa(baad, boxa, L_INSERT);
    }
    return baad;
}
/*!
 *  boxaaCopy()
 *
 *      Input:  baas (input boxaa to be copied)
 *              copyflag (L_COPY, L_CLONE)
 *      Return: baad (new boxaa, composed of copies or clones of the boxa
 *                    in baas), or null on error
 *
 *  Notes:
 *      (1) L_COPY makes a copy of each boxa in baas.
 *          L_CLONE makes a clone of each boxa in baas.
 */
BOXAA *
boxaaCopy(BOXAA   *baas,
          l_int32  copyflag)
{
l_int32  i, n;
BOXA    *boxa;
BOXAA   *baad;

    PROCNAME("boxaaCopy");

    if (!baas)
        return (BOXAA *)ERROR_PTR("baas not defined", procName, NULL);
    if (copyflag != L_COPY && copyflag != L_CLONE)
        return (BOXAA *)ERROR_PTR("invalid copyflag", procName, NULL);

    n = boxaaGetCount(baas);
    baad = boxaaCreate(n);
    for (i = 0; i < n; i++) {
        boxa = boxaaGetBoxa(baas, i, copyflag);
        boxaaAddBoxa(baad, boxa, L_INSERT);
    }

    return baad;
}
/*!
 *  boxaaReadStreamVersion2()
 *
 *      Input:  stream
 *      Return: boxaa, or null on error
 *
 */
BOXAA *
boxaaReadStreamVersion2(FILE  *fp)
{
l_int32  n, i, x, y, w, h, version;
l_int32  ignore;
BOXA    *boxa;
BOXAA   *baa;

    PROCNAME("boxaaReadStreamVersion2");

    if (!fp)
        return (BOXAA *)ERROR_PTR("stream not defined", procName, NULL);

    if (fscanf(fp, "\nBoxaa Version %d\n", &version) != 1)
        return (BOXAA *)ERROR_PTR("not a boxaa file", procName, NULL);
    if (version != 2) {
        fprintf(stderr, "This is version %d\n", version);
        return (BOXAA *)ERROR_PTR("Not old version 2", procName, NULL);
    }
    if (fscanf(fp, "Number of boxa = %d\n", &n) != 1)
        return (BOXAA *)ERROR_PTR("not a boxaa file", procName, NULL);

    if ((baa = boxaaCreate(n)) == NULL)
        return (BOXAA *)ERROR_PTR("boxaa not made", procName, NULL);

    for (i = 0; i < n; i++) {
        if (fscanf(fp, " Boxa[%d]: x = %d, y = %d, w = %d, h = %d\n",
                &ignore, &x, &y, &w, &h) != 5)
            return (BOXAA *)ERROR_PTR("boxa descr not valid", procName, NULL);
        if ((boxa = boxaReadStream(fp)) == NULL)
            return (BOXAA *)ERROR_PTR("boxa not made", procName, NULL);
        boxaaAddBoxa(baa, boxa, L_INSERT);
    }

    return baa;
}
Exemplo n.º 8
0
std::vector<Figure> extractFigures(PIX *original, PageRegions &pageRegions,
                                   DocumentStatistics &docStats, bool verbose,
                                   bool showSteps,
                                   std::vector<Figure> &errors) {
  BOXA *bodytext = pageRegions.bodytext;
  BOXA *graphics = pageRegions.graphics;
  BOXA *captions = pageRegions.getCaptionsBoxa();
  std::vector<Caption> unassigned_captions = pageRegions.captions;
  int total_captions = captions->n;

  PIXA *steps = showSteps ? pixaCreate(4) : NULL;

  // Add bodyText boxes to fill up the margin
  BOX *margin;
  BOX *foreground;
  pixClipToForeground(original, NULL, &foreground);
  BOX *extent;
  boxaGetExtent(graphics, NULL, NULL, &extent);
  margin = boxBoundingRegion(extent, foreground);
  boxDestroy(&extent);
  boxaGetExtent(bodytext, NULL, NULL, &extent);
  margin = boxBoundingRegion(margin, extent);
  boxDestroy(&extent);
  boxaGetExtent(pageRegions.other, NULL, NULL, &extent);
  margin = boxBoundingRegion(margin, extent);
  int x = margin->x - 2, y = margin->y - 2, h = margin->h + 4,
      w = margin->w + 4;
  x = std::max(x, 0);
  y = std::max(y, 0);
  h = std::min((int)original->h, h);
  w = std::min((int)original->w, w);
  boxDestroy(&margin);
  boxaAddBox(bodytext, boxCreate(0, 0, original->w, y), L_CLONE);
  boxaAddBox(bodytext, boxCreate(0, y + h, original->w, original->h - y - h),
             L_CLONE);
  boxaAddBox(bodytext, boxCreate(0, 0, x, original->h), L_CLONE);
  boxaAddBox(bodytext, boxCreate(x + w, 0, original->w - x - w, original->h),
             L_CLONE);

  // Add captions to body text
  boxaJoin(bodytext, captions, 0, captions->n);

  if (showSteps)
    pixaAddPix(steps, original, L_CLONE);

  // Generate proposed regions for each caption box
  double center = original->w / 2.0;
  BOXAA *allProposals = boxaaCreate(captions->n);
  BOXA *claimedImages = boxaCreate(captions->n);
  for (int i = 0; i < captions->n; i++) {
    BOX *captBox = boxaGetBox(captions, i, L_CLONE);
    BOXA *proposals = boxaCreate(4);
    for (int j = 0; j < bodytext->n; j++) {
      BOX *txtBox = boxaGetBox(bodytext, j, L_CLONE);
      BOX *proposal = NULL;
      int tolerance = 2;
      int horizontal = 0;
      int vertical = 0;

      boxAlignment(captBox, txtBox, tolerance, &horizontal, &vertical);
      if (vertical * horizontal != 0 or (vertical == 0 and horizontal == 0)) {
        continue;
      }

      if (vertical == 0) {
        if (horizontal == 1) {
          proposal = boxRelocateOneSide(NULL, captBox,
                                        txtBox->x + txtBox->w + 2, L_FROM_LEFT);
        } else if (horizontal == -1) {
          proposal =
              boxRelocateOneSide(NULL, captBox, txtBox->x - 2, L_FROM_RIGHT);
        }
        boxExpandUD(proposal, bodytext);
        if (horizontal == -1) {
          proposal->w -= captBox->w + 1;
          proposal->x = captBox->x + captBox->w + 1;
        } else if (horizontal == 1) {
          proposal->w -= captBox->w + 1;
        }
      } else {
        if (vertical == 1) {
          proposal = boxRelocateOneSide(NULL, captBox,
                                        txtBox->y + txtBox->h + 3, L_FROM_TOP);
        } else if (vertical == -1) {
          proposal =
              boxRelocateOneSide(NULL, captBox, txtBox->y - 3, L_FROM_BOT);
        }
        boxExpandLR(proposal, bodytext);
        if (vertical == -1) {
          proposal->h -= captBox->h + 1;
          proposal->y = captBox->y + captBox->h + 1;
        } else if (vertical == 1) {
          proposal->h -= captBox->h + 1;
        }
      }

      // For two columns document, captions that do not
      // cross the center should not have regions pass the center
      if (docStats.documentIsTwoColumn()) {
        if (captBox->x + captBox->w <= center and
            proposal->x + proposal->w > center) {
          boxRelocateOneSide(proposal, proposal, center - 1, L_FROM_RIGHT);
        } else if (captBox->x >= center and proposal->x < center) {
          boxRelocateOneSide(proposal, proposal, center + 1, L_FROM_LEFT);
        }
      }

      BOX *clippedProposal;
      pixClipBoxToForeground(original, proposal, NULL, &clippedProposal);
      if (clippedProposal != NULL and
          scoreBox(clippedProposal, pageRegions.captions.at(i).type, bodytext,
                   graphics, claimedImages, original) > 0) {
        boxaAddBox(proposals, clippedProposal, L_CLONE);
      }
    }

    if (proposals->n > 0) {
      boxaaAddBoxa(allProposals, proposals, L_CLONE);
    } else {
      // Give up on this caption
      int on_caption = i - (total_captions - unassigned_captions.size());
      errors.push_back(Figure(unassigned_captions.at(on_caption), NULL));
      unassigned_captions.erase(unassigned_captions.begin() + on_caption);
    }
  }
  std::vector<Figure> figures = std::vector<Figure>();
  if (unassigned_captions.size() == 0) {
    return figures;
  }

  // Now go through every possible assignment of captions
  // to proposals pick the highest scorign one
  int numConfigurations = 1;
  for (int i = 0; i < allProposals->n; ++i) {
    numConfigurations *= allProposals->boxa[i]->n;
  }

  if (verbose)
    printf("Found %d possible configurations\n", numConfigurations);

  BOXA *bestProposals = NULL;
  std::vector<bool> bestKeep;
  int bestFound = -1;
  double bestScore = -1;
  for (int onConfig = 0; onConfig < numConfigurations; ++onConfig) {

    // Gather the proposed regions based on the configuration number
    int configNum = onConfig;
    BOXA *proposals = boxaCreate(allProposals->n);
    std::vector<bool> keep;
    for (int i = 0; i < allProposals->n; ++i) {
      int numProposals = allProposals->boxa[i]->n;
      int selected = configNum % numProposals;
      configNum = configNum / numProposals;
      boxaAddBox(proposals, allProposals->boxa[i]->box[selected], L_COPY);
    }

    // Attempt to split any overlapping regions
    for (int i = 0; i < proposals->n; ++i) {
      for (int j = i; j < proposals->n; ++j) {
        BOX *p1 = proposals->box[i];
        BOX *p2 = proposals->box[j];
        int eq;
        boxEqual(p1, p2, &eq);
        if (not eq)
          continue;
        int vertical, horizontal;
        boxAlignment(unassigned_captions.at(i).boundingBox,
                     unassigned_captions.at(j).boundingBox, 2, &horizontal,
                     &vertical);
        if (vertical == 0 or horizontal != 0)
          continue;

        double split = splitBoxVertical(original, p1);
        if (split > 0) {
          BOX *topClipped;
          BOX *botClipped;
          BOX *top = boxRelocateOneSide(NULL, p1, split - 1, L_FROM_BOT);
          pixClipBoxToForeground(original, top, NULL, &topClipped);
          BOX *bot = boxRelocateOneSide(NULL, p1, split + 1, L_FROM_TOP);
          pixClipBoxToForeground(original, bot, NULL, &botClipped);
          if (vertical == -1) {
            proposals->box[i] = topClipped;
            proposals->box[j] = botClipped;
          } else {
            proposals->box[i] = botClipped;
            proposals->box[j] = topClipped;
          }
          if (verbose)
            printf("Split a region vertically\n");
        }
      }
    }

    if (showSteps) {
      pixaAddPix(steps, pixDrawBoxa(original, proposals, 4, 0xff000000),
                 L_CLONE);
    }

    // Score the proposals
    int numFound = 0;
    double totalScore = 0;
    for (int i = 0; i < proposals->n; ++i) {
      double score =
          scoreBox(proposals->box[i], pageRegions.captions.at(i).type, bodytext,
                   graphics, proposals, original);
      totalScore += score;
      if (score > 0) {
        numFound += 1;
        keep.push_back(true);
      } else {
        keep.push_back(false);
      }
    }

    // Switch in for the current best needed
    if (numFound > bestFound or
        (numFound == bestFound and totalScore > bestScore)) {
      bestFound = numFound;
      bestScore = totalScore;
      bestProposals = proposals;
      bestKeep = keep;
    }
  }

  if (showSteps) {
    BOX *clip;
    PIXA *show = pixaCreate(4);
    pixClipBoxToForeground(original, NULL, NULL, &clip);
    int pad = 10;
    clip->x -= 10;
    clip->y -= 10;
    clip->w += pad * 2;
    clip->h += pad * 2;
    for (int i = 0; i < steps->n; ++i) {
      pixaAddPix(show, pixClipRectangle(steps->pix[i], clip, NULL), L_CLONE);
    }
    pixDisplay(pixaDisplayTiled(pixaConvertTo32(show), 4000, 1, 30), 0, 0);
  }

  for (int i = 0; i < bestProposals->n; ++i) {
    if (bestKeep.at(i)) {
      BOX *imageBox = bestProposals->box[i];
      int pad = 2;
      imageBox->x -= pad;
      imageBox->y -= pad;
      imageBox->w += pad * 2;
      imageBox->h += pad * 2;
      figures.push_back(Figure(unassigned_captions.at(i), imageBox));
    } else {
      errors.push_back(Figure(unassigned_captions.at(i), NULL));
    }
  }
  return figures;
}
Exemplo n.º 9
0
int main(int    argc,
         char **argv)
{
char         filename[BUF_SIZE];
char        *dirin, *rootname, *fname;
l_int32      i, j, w, h, firstpage, npages, nfiles, ncomp;
l_int32      index, ival, rval, gval, bval;
BOX         *box;
BOXA        *boxa;
BOXAA       *baa;
JBDATA      *data;
JBCLASSER   *classer;
NUMA        *nai;
NUMAA       *naa;
SARRAY      *safiles;
PIX         *pixs, *pixt1, *pixt2, *pixd;
PIXCMAP     *cmap;
static char  mainName[] = "wordsinorder";

    if (argc != 3 && argc != 5)
        return ERROR_INT(
            " Syntax: wordsinorder dirin rootname [firstpage, npages]",
            mainName, 1);

    dirin = argv[1];
    rootname = argv[2];

    if (argc == 3) {
        firstpage = 0;
        npages = 0;
    }
    else {
        firstpage = atoi(argv[3]);
        npages = atoi(argv[4]);
    }

        /* Compute the word bounding boxes at 2x reduction, along with
         * the textlines that they are in. */
    safiles = getSortedPathnamesInDirectory(dirin, NULL, firstpage, npages);
    nfiles = sarrayGetCount(safiles);
    baa = boxaaCreate(nfiles);
    naa = numaaCreate(nfiles);
    for (i = 0; i < nfiles; i++) {
        fname = sarrayGetString(safiles, i, 0);
        if ((pixs = pixRead(fname)) == NULL) {
            L_WARNING("image file %d not read\n", mainName, i);
            continue;
        }
        pixGetWordBoxesInTextlines(pixs, 2, MIN_WORD_WIDTH, MIN_WORD_HEIGHT,
                                   MAX_WORD_WIDTH, MAX_WORD_HEIGHT,
                                   &boxa, &nai);
        boxaaAddBoxa(baa, boxa, L_INSERT);
        numaaAddNuma(naa, nai, L_INSERT);

#if  RENDER_PAGES
            /* Show the results on a 2x reduced image, where each
             * word is outlined and the color of the box depends on the
             * computed textline. */
        pixt1 = pixReduceRankBinary2(pixs, 2, NULL);
        pixGetDimensions(pixt1, &w, &h, NULL);
        pixd = pixCreate(w, h, 8);
        cmap = pixcmapCreateRandom(8, 1, 1);  /* first color is black */
        pixSetColormap(pixd, cmap);

        pixt2 = pixUnpackBinary(pixt1, 8, 1);
        pixRasterop(pixd, 0, 0, w, h, PIX_SRC | PIX_DST, pixt2, 0, 0);
        ncomp = boxaGetCount(boxa);
        for (j = 0; j < ncomp; j++) {
            box = boxaGetBox(boxa, j, L_CLONE);
            numaGetIValue(nai, j, &ival);
            index = 1 + (ival % 254);  /* omit black and white */
            pixcmapGetColor(cmap, index, &rval, &gval, &bval);
            pixRenderBoxArb(pixd, box, 2, rval, gval, bval);
            boxDestroy(&box);
        }

        snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i);
        fprintf(stderr, "filename: %s\n", filename);
        pixWrite(filename, pixd, IFF_PNG);
        pixDestroy(&pixt1);
        pixDestroy(&pixt2);
        pixDestroy(&pixs);
        pixDestroy(&pixd);
#endif  /* RENDER_PAGES */
    }

    boxaaDestroy(&baa);
    numaaDestroy(&naa);
    sarrayDestroy(&safiles);
    return 0;
}
Exemplo n.º 10
0
main(int    argc,
     char **argv)
{
l_int32      h;
l_float32    scalefactor;
BOX         *box;
BOXA        *boxa1, *boxa2;
BOXAA       *baa;
PIX         *pix1, *pix2, *pix3, *pix4, *pix5, *pix6, *pix7, *pix8, *pix9;
L_REGPARAMS  *rp;

    if (regTestSetup(argc, argv, &rp))
        return 1;

    lept_rmdir("segtest");
    lept_mkdir("segtest");
    baa = boxaaCreate(5);

        /* Image region input.  */
    pix1 = pixRead("wet-day.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pixWrite("/tmp/segtest/0.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/0.jpg");   /* 0 */
    box = boxCreate(105, 161, 620, 872);   /* image region */
    boxa1 = boxaCreate(1);
    boxaAddBox(boxa1, box, L_INSERT);
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);

        /* Compute image region at w = 2 * WIDTH */
    pix1 = pixRead("candelabrum-11.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pix3 = pixConvertTo1(pix2, 100);
    pix4 = pixExpandBinaryPower2(pix3, 2);  /* w = 2 * WIDTH */
    pix5 = pixGenHalftoneMask(pix4, NULL, NULL, 1);
    pix6 = pixMorphSequence(pix5, "c20.1 + c1.20", 0);
    pix7 = pixMaskConnComp(pix6, 8, &boxa1);
    pix8 = pixReduceBinary2(pix7, NULL);  /* back to w = WIDTH */
    pix9 = pixBackgroundNormSimple(pix2, pix8, NULL);
    pixWrite("/tmp/segtest/1.jpg", pix9, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/1.jpg");   /* 1 */
    boxa2 = boxaTransform(boxa1, 0, 0, 0.5, 0.5);  /* back to w = WIDTH */
    boxaaAddBoxa(baa, boxa2, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    pixDestroy(&pix5);
    pixDestroy(&pix6);
    pixDestroy(&pix7);
    pixDestroy(&pix8);
    pixDestroy(&pix9);
    boxaDestroy(&boxa1);

        /* Use mask to find image region */
    pix1 = pixRead("lion-page.00016.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pixWrite("/tmp/segtest/2.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/2.jpg");   /* 2 */
    pix3 = pixRead("lion-mask.00016.tif");
    pix4 = pixScaleToSize(pix3, WIDTH, 0);
    boxa1 = pixConnComp(pix4, NULL, 8);
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);

        /* Compute image region at full res */
    pix1 = pixRead("rabi.png");
    scalefactor = (l_float32)WIDTH / (l_float32)pixGetWidth(pix1);
    pix2 = pixScaleToGray(pix1, scalefactor);
    pixWrite("/tmp/segtest/3.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/3.jpg");   /* 3 */
    pix3 = pixGenHalftoneMask(pix1, NULL, NULL, 0);
    pix4 = pixMorphSequence(pix3, "c20.1 + c1.20", 0);
    boxa1 = pixConnComp(pix4, NULL, 8);
    boxa2 = boxaTransform(boxa1, 0, 0, scalefactor, scalefactor);
    boxaaAddBoxa(baa, boxa2, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);
    pixDestroy(&pix3);
    pixDestroy(&pix4);
    boxaDestroy(&boxa1);

        /* Page with no image regions */
    pix1 = pixRead("lucasta-47.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    boxa1 = boxaCreate(1);
    pixWrite("/tmp/segtest/4.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/4.jpg");   /* 4 */
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);

        /* Page that is all image */
    pix1 = pixRead("map1.jpg");
    pix2 = pixScaleToSize(pix1, WIDTH, 0);
    pixWrite("/tmp/segtest/5.jpg", pix2, IFF_JFIF_JPEG);
    regTestCheckFile(rp, "/tmp/segtest/5.jpg");   /* 5 */
    h = pixGetHeight(pix2);
    box = boxCreate(0, 0, WIDTH, h);
    boxa1 = boxaCreate(1);
    boxaAddBox(boxa1, box, L_INSERT);
    boxaaAddBoxa(baa, boxa1, L_INSERT);
    pixDestroy(&pix1);
    pixDestroy(&pix2);

        /* Save the boxaa file */
    boxaaWrite("/tmp/segtest/seg.baa", baa);
    regTestCheckFile(rp, "/tmp/segtest/seg.baa");   /* 6 */

        /* Do the conversion */
    l_pdfSetDateAndVersion(FALSE);
    convertSegmentedFilesToPdf("/tmp/segtest", ".jpg", 100, L_G4_ENCODE,
                               140, baa, 75, 0.6, "Segmentation Test",
                               "/tmp/pdfseg.7.pdf");
    regTestCheckFile(rp, "/tmp/pdfseg.7.pdf");   /* 7 */

    boxaaDestroy(&baa);
    return regTestCleanup(rp);
}
Exemplo n.º 11
0
// Creates new set of lines from the computed columns
bool CubeLineSegmenter::AddLines(Pixa *lines) {
  // create an array that will hold the bounding boxes
  // of the concomps belonging to each line
  Boxaa *lines_con_comps = boxaaCreate(lines->n);
  if (lines_con_comps == NULL) {
    return false;
  }

  for (int line = 0; line < lines->n; line++) {
    // if the line is not valid
    if (ValidLine(lines->pix[line], lines->boxa->box[line]) == false) {
      // split it
      Pixa *split_lines = SplitLine(lines->pix[line],
          lines->boxa->box[line]);

      // remove the old line
      if (pixaRemovePix(lines, line) != 0) {
        return false;
      }

      line--;

      if (split_lines == NULL) {
        continue;
      }

      // add the split lines instead and move the pointer
      for (int s_line = 0; s_line < split_lines->n; s_line++) {
        Pix *sp_line = pixaGetPix(split_lines, s_line, L_CLONE);
        Box *sp_box = boxaGetBox(split_lines->boxa, s_line, L_CLONE);

        if (sp_line == NULL || sp_box == NULL) {
          return false;
        }

        // insert the new line
        if (pixaInsertPix(lines, ++line, sp_line, sp_box) != 0) {
          return false;
        }
      }

      // remove the split lines
      pixaDestroy(&split_lines);
    }
  }

  // compute the concomps bboxes of each line
  for (int line = 0; line < lines->n; line++) {
    Boxa *line_con_comps = ComputeLineConComps(lines->pix[line],
        lines->boxa->box[line], NULL);

    if (line_con_comps == NULL) {
      return false;
    }

    // insert it into the boxaa array
    if (boxaaAddBoxa(lines_con_comps, line_con_comps, L_INSERT) != 0) {
      return false;
    }
  }

  // post process the lines:
  // merge the contents of "small" lines info legitimate lines
  for (int line = 0; line < lines->n; line++) {
    // a small line detected
    if (SmallLine(lines->boxa->box[line]) == true) {
      // merge its components to one of the valid lines
      if (MergeLine(lines->pix[line], lines->boxa->box[line],
          lines, lines_con_comps) == true) {
        // remove the small line
        if (pixaRemovePix(lines, line) != 0) {
          return false;
        }

        if (boxaaRemoveBoxa(lines_con_comps, line) != 0) {
          return false;
        }

        line--;
      }
    }
  }

  boxaaDestroy(&lines_con_comps);

  // add the pix masks
  if (pixaaAddPixa(columns_, lines, L_INSERT) != 0) {
    return false;
  }

  return true;
}
Exemplo n.º 12
0
/*!
 *  boxaSort2d()
 *
 *      Input:  boxas
 *              &naa (<optional return> numaa with sorted indices
 *                    whose values are the indices of the input array)
 *              delta1 (min overlap that permits aggregation of a box
 *                      onto a boxa of horizontally-aligned boxes; pass 1)
 *              delta2 (min overlap that permits aggregation of a box
 *                      onto a boxa of horizontally-aligned boxes; pass 2)
 *              minh1 (components less than this height either join an
 *                     existing boxa or are set aside for pass 2)
 *      Return: boxaa (2d sorted version of boxa), or null on error
 *
 *  Notes:
 *      (1) The final result is a sort where the 'fast scan' direction is
 *          left to right, and the 'slow scan' direction is from top
 *          to bottom.  Each boxa in the boxaa represents a sorted set
 *          of boxes from left to right.
 *      (2) Two passes are used to aggregate the boxas, which can corresond
 *          to characters or words in a line of text.  In pass 1, only
 *          taller components, which correspond to xheight or larger,
 *          are permitted to start a new boxa, whereas in pass 2,
 *          the remaining vertically-challenged components are allowed
 *          to join an existing boxa or start a new one.
 *      (3) If delta1 < 0, the first pass allows aggregation when
 *          boxes in the same boxa do not overlap vertically.
 *          The distance by which they can miss and still be aggregated
 *          is the absolute value |delta1|.   Similar for delta2 on
 *          the second pass.
 *      (4) On the first pass, any component of height less than minh1
 *          cannot start a new boxa; it's put aside for later insertion.
 *      (5) On the second pass, any small component that doesn't align
 *          with an existing boxa can start a new one.
 *      (6) This can be used to identify lines of text from
 *          character or word bounding boxes.
 */
BOXAA *
boxaSort2d(BOXA    *boxas,
           NUMAA  **pnaad,
           l_int32  delta1,
           l_int32  delta2,
           l_int32  minh1)
{
l_int32  i, index, h, nt, ne, n, m, ival;
BOX     *box;
BOXA    *boxa, *boxae, *boxan, *boxat1, *boxat2, *boxav, *boxavs;
BOXAA   *baa, *baad;
NUMA    *naindex, *nae, *nan, *nah, *nav, *nat1, *nat2, *nad;
NUMAA   *naa, *naad;

    PROCNAME("boxaSort2d");

    if (pnaad) *pnaad = NULL;
    if (!boxas)
        return (BOXAA *)ERROR_PTR("boxas not defined", procName, NULL);

        /* Sort from left to right */
    if ((boxa = boxaSort(boxas, L_SORT_BY_X, L_SORT_INCREASING, &naindex))
                    == NULL)
        return (BOXAA *)ERROR_PTR("boxa not made", procName, NULL);

        /* First pass: assign taller boxes to boxa by row */
    nt = boxaGetCount(boxa);
    baa = boxaaCreate(0);
    naa = numaaCreate(0);
    boxae = boxaCreate(0);  /* save small height boxes here */
    nae = numaCreate(0);  /* keep track of small height boxes */
    for (i = 0; i < nt; i++) {
        box = boxaGetBox(boxa, i, L_CLONE);
        boxGetGeometry(box, NULL, NULL, NULL, &h);
        if (h < minh1) {  /* save for 2nd pass */
            boxaAddBox(boxae, box, L_INSERT);
            numaAddNumber(nae, i);
        }
        else {
            n = boxaaGetCount(baa);
            boxaaAlignBox(baa, box, delta1, &index);
            if (index < n) {  /* append to an existing boxa */
                boxaaAddBox(baa, index, box, L_INSERT);
            }
            else {  /* doesn't align, need new boxa */
                boxan = boxaCreate(0);
                boxaAddBox(boxan, box, L_INSERT);
                boxaaAddBoxa(baa, boxan, L_INSERT);
                nan = numaCreate(0);
                numaaAddNuma(naa, nan, L_INSERT);
            }
            numaGetIValue(naindex, i, &ival);
            numaaAddNumber(naa, index, ival);
        }
    }
    boxaDestroy(&boxa);
    numaDestroy(&naindex);

        /* Second pass: feed in small height boxes;
         * TODO: this correctly, using local y position! */
    ne = boxaGetCount(boxae);
    for (i = 0; i < ne; i++) {
        box = boxaGetBox(boxae, i, L_CLONE);
        n = boxaaGetCount(baa);
        boxaaAlignBox(baa, box, delta2, &index);
        if (index < n) {  /* append to an existing boxa */
            boxaaAddBox(baa, index, box, L_INSERT);
        }
        else {  /* doesn't align, need new boxa */
            boxan = boxaCreate(0);
            boxaAddBox(boxan, box, L_INSERT);
            boxaaAddBoxa(baa, boxan, L_INSERT);
            nan = numaCreate(0);
            numaaAddNuma(naa, nan, L_INSERT);
        }
        numaGetIValue(nae, i, &ival);  /* location in original boxas */
        numaaAddNumber(naa, index, ival);
    }

        /* Sort each boxa in the boxaa */
    m = boxaaGetCount(baa);
    for (i = 0; i < m; i++) {
        boxat1 = boxaaGetBoxa(baa, i, L_CLONE);
        boxat2 = boxaSort(boxat1, L_SORT_BY_X, L_SORT_INCREASING, &nah);
        boxaaReplaceBoxa(baa, i, boxat2);
        nat1 = numaaGetNuma(naa, i, L_CLONE);
        nat2 = numaSortByIndex(nat1, nah);
        numaaReplaceNuma(naa, i, nat2);
        boxaDestroy(&boxat1);
        numaDestroy(&nat1);
        numaDestroy(&nah);
    }

        /* Sort boxa vertically within boxaa, using the first box
         * in each boxa. */
    m = boxaaGetCount(baa);
    boxav = boxaCreate(m);  /* holds first box in each boxa in baa */
    naad = numaaCreate(m);
    if (pnaad)
        *pnaad = naad;
    baad = boxaaCreate(m);
    for (i = 0; i < m; i++) {
        boxat1 = boxaaGetBoxa(baa, i, L_CLONE);
        box = boxaGetBox(boxat1, 0, L_CLONE);
        boxaAddBox(boxav, box, L_INSERT);
        boxaDestroy(&boxat1);
    }
    boxavs = boxaSort(boxav, L_SORT_BY_Y, L_SORT_INCREASING, &nav);
    for (i = 0; i < m; i++) {
        numaGetIValue(nav, i, &index);
        boxa = boxaaGetBoxa(baa, index, L_CLONE);
        boxaaAddBoxa(baad, boxa, L_INSERT);
        nad = numaaGetNuma(naa, index, L_CLONE);
        numaaAddNuma(naad, nad, L_INSERT);
    }

/*    fprintf(stderr, "box count = %d, numaa count = %d\n", nt,
            numaaGetNumberCount(naad)); */

    boxaaDestroy(&baa);
    boxaDestroy(&boxav);
    boxaDestroy(&boxavs);
    boxaDestroy(&boxae);
    numaDestroy(&nav);
    numaDestroy(&nae);
    numaaDestroy(&naa);
    if (!pnaad)
        numaaDestroy(&naad);

    return baad;
}