Exemplo n.º 1
0
l_int32 main(int    argc,
             char **argv)
{
L_ASET  *s;
PIX     *pix;

    setLeptDebugOK(1);
    pix = pixRead("weasel8.240c.png");

        /* Build the set from all the pixels. */
    s = BuildSet(pix, 1, FALSE);
    TestSetIterator(s, FALSE);
    l_asetDestroy(&s);

        /* Ditto, but just with a few pixels */
    s = BuildSet(pix, 10, TRUE);
    TestSetIterator(s, TRUE);
    l_asetDestroy(&s);
    pixDestroy(&pix);

    pix = pixRead("marge.jpg");
    startTimer();
    s = BuildSet(pix, 1, FALSE);
    fprintf(stderr, "Time (250K pixels): %7.3f sec\n", stopTimer());
    TestSetIterator(s, FALSE);
    l_asetDestroy(&s);
    pixDestroy(&pix);
    return 0;
}
Exemplo n.º 2
0
/*!
 *  ptaRemoveDupsByAset()
 *
 *      Input:  ptas (assumed to be integer values)
 *      Return: ptad (with duplicates removed), or null on error
 *
 *  Notes:
 *      (1) This is slower than ptaRemoveDupsByHash(), mostly because
 *          of the nlogn sort to build up the rbtree.  Do not use for
 *          large numbers of points (say, > 1M).
 */
PTA *
ptaRemoveDupsByAset(PTA  *ptas)
{
l_int32   i, n, x, y;
PTA      *ptad;
l_uint64  hash;
L_ASET   *set;
RB_TYPE   key;

    PROCNAME("ptaRemoveDupsByAset");

    if (!ptas)
        return (PTA *)ERROR_PTR("ptas not defined", procName, NULL);

    set = l_asetCreate(L_UINT_TYPE);
    n = ptaGetCount(ptas);
    ptad = ptaCreate(n);
    for (i = 0; i < n; i++) {
        ptaGetIPt(ptas, i, &x, &y);
        l_hashPtToUint64(x, y, &hash);
        key.utype = hash;
        if (!l_asetFind(set, key)) {
            ptaAddPt(ptad, x, y);
            l_asetInsert(set, key);
        }
    }

    l_asetDestroy(&set);
    return ptad;
}
/*!
 * \brief   sarrayRemoveDupsByAset()
 *
 * \param[in]    sas
 * \return  sad with duplicates removed, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) This is O(nlogn), considerably slower than
 *          sarrayRemoveDupsByHash() for large string arrays.
 *      (2) The key for each string is a 64-bit hash.
 *      (3) Build a set, using hashed strings as keys.  As the set is
 *          built, first do a find; if not found, add the key to the
 *          set and add the string to the output sarray.
 * </pre>
 */
SARRAY *
sarrayRemoveDupsByAset(SARRAY  *sas)
{
char     *str;
l_int32   i, n;
l_uint64  hash;
L_ASET   *set;
RB_TYPE   key;
SARRAY   *sad;

    PROCNAME("sarrayRemoveDupsByAset");

    if (!sas)
        return (SARRAY *)ERROR_PTR("sas not defined", procName, NULL);

    set = l_asetCreate(L_UINT_TYPE);
    sad = sarrayCreate(0);
    n = sarrayGetCount(sas);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sas, i, L_NOCOPY);
        l_hashStringToUint64(str, &hash);
        key.utype = hash;
        if (!l_asetFind(set, key)) {
            sarrayAddString(sad, str, L_COPY);
            l_asetInsert(set, key);
        }
    }

    l_asetDestroy(&set);
    return sad;
}
Exemplo n.º 4
0
/*!
 * \brief   l_dnaRemoveDupsByAset()
 *
 * \param[in]    das
 * \return  dad with duplicates removed, or NULL on error
 */
L_DNA *
l_dnaRemoveDupsByAset(L_DNA  *das)
{
l_int32    i, n;
l_float64  val;
L_DNA     *dad;
L_ASET    *set;
RB_TYPE    key;

    PROCNAME("l_dnaRemoveDupsByAset");

    if (!das)
        return (L_DNA *)ERROR_PTR("das not defined", procName, NULL);

    set = l_asetCreate(L_FLOAT_TYPE);
    dad = l_dnaCreate(0);
    n = l_dnaGetCount(das);
    for (i = 0; i < n; i++) {
        l_dnaGetDValue(das, i, &val);
        key.ftype = val;
        if (!l_asetFind(set, key)) {
            l_dnaAddNumber(dad, val);
            l_asetInsert(set, key);
        }
    }

    l_asetDestroy(&set);
    return dad;
}
/*!
 * \brief   sarrayIntersectionByAset()
 *
 * \param[in]    sa1, sa2
 * \return  sad with the intersection of the string set, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) Algorithm: put the smaller sarray into a set, using the string
 *          hashes as the key values.  Then run through the larger sarray,
 *          building an output sarray and a second set from the strings
 *          in the larger array: if a string is in the first set but
 *          not in the second, add the string to the output sarray and hash
 *          it into the second set.  The second set is required to make
 *          sure only one instance of each string is put into the output sarray.
 *          This is O(mlogn), {m,n} = sizes of {smaller,larger} input arrays.
 * </pre>
 */
SARRAY *
sarrayIntersectionByAset(SARRAY  *sa1,
                         SARRAY  *sa2)
{
char     *str;
l_int32   n1, n2, i, n;
l_uint64  hash;
L_ASET   *set1, *set2;
RB_TYPE   key;
SARRAY   *sa_small, *sa_big, *sad;

    PROCNAME("sarrayIntersectionByAset");

    if (!sa1)
        return (SARRAY *)ERROR_PTR("sa1 not defined", procName, NULL);
    if (!sa2)
        return (SARRAY *)ERROR_PTR("sa2 not defined", procName, NULL);

        /* Put the elements of the biggest array into a set */
    n1 = sarrayGetCount(sa1);
    n2 = sarrayGetCount(sa2);
    sa_small = (n1 < n2) ? sa1 : sa2;   /* do not destroy sa_small */
    sa_big = (n1 < n2) ? sa2 : sa1;   /* do not destroy sa_big */
    set1 = l_asetCreateFromSarray(sa_big);

        /* Build up the intersection of strings */
    sad = sarrayCreate(0);
    n = sarrayGetCount(sa_small);
    set2 = l_asetCreate(L_UINT_TYPE);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sa_small, i, L_NOCOPY);
        l_hashStringToUint64(str, &hash);
        key.utype = hash;
        if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
            sarrayAddString(sad, str, L_COPY);
            l_asetInsert(set2, key);
        }
    }

    l_asetDestroy(&set1);
    l_asetDestroy(&set2);
    return sad;
}
Exemplo n.º 6
0
/*!
 *  ptaIntersectionByAset()
 *
 *      Input:  pta1, pta2
 *      Return: ptad (intersection of the point sets), or null on error
 *
 *  Notes:
 *      (1) See sarrayIntersectionByAset() for the approach.
 *      (2) The key is a 64-bit hash from the (x,y) pair.
 *      (3) This is slower than ptaIntersectionByHash(), mostly because
 *          of the nlogn sort to build up the rbtree.  Do not use for
 *          large numbers of points (say, > 1M).
 */
PTA *
ptaIntersectionByAset(PTA  *pta1,
                      PTA  *pta2)
{
l_int32   n1, n2, i, n, x, y;
l_uint64  hash;
L_ASET   *set1, *set2;
RB_TYPE   key;
PTA      *pta_small, *pta_big, *ptad;

    PROCNAME("ptaIntersectionByAset");

    if (!pta1)
        return (PTA *)ERROR_PTR("pta1 not defined", procName, NULL);
    if (!pta2)
        return (PTA *)ERROR_PTR("pta2 not defined", procName, NULL);

        /* Put the elements of the biggest array into a set */
    n1 = ptaGetCount(pta1);
    n2 = ptaGetCount(pta2);
    pta_small = (n1 < n2) ? pta1 : pta2;   /* do not destroy pta_small */
    pta_big = (n1 < n2) ? pta2 : pta1;   /* do not destroy pta_big */
    set1 = l_asetCreateFromPta(pta_big);

        /* Build up the intersection of points */
    ptad = ptaCreate(0);
    n = ptaGetCount(pta_small);
    set2 = l_asetCreate(L_UINT_TYPE);
    for (i = 0; i < n; i++) {
        ptaGetIPt(pta_small, i, &x, &y);
        l_hashPtToUint64(x, y, &hash);
        key.utype = hash;
        if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
            ptaAddPt(ptad, x, y);
            l_asetInsert(set2, key);
        }
    }

    l_asetDestroy(&set1);
    l_asetDestroy(&set2);
    return ptad;
}
Exemplo n.º 7
0
/*!
 * \brief   l_dnaIntersectionByAset()
 *
 * \param[in]    da1, da2
 * \return  dad with the intersection of the two arrays, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) See sarrayIntersection() for the approach.
 *      (2) Here, the key in building the sorted tree is the number itself.
 *      (3) Operations using an underlying tree are O(nlogn), which is
 *          typically less efficient than hashing, which is O(n).
 * </pre>
 */
L_DNA *
l_dnaIntersectionByAset(L_DNA  *da1,
                        L_DNA  *da2)
{
l_int32    n1, n2, i, n;
l_float64  val;
L_ASET    *set1, *set2;
RB_TYPE    key;
L_DNA     *da_small, *da_big, *dad;

    PROCNAME("l_dnaIntersectionByAset");

    if (!da1)
        return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL);
    if (!da2)
        return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL);

        /* Put the elements of the largest array into a set */
    n1 = l_dnaGetCount(da1);
    n2 = l_dnaGetCount(da2);
    da_small = (n1 < n2) ? da1 : da2;   /* do not destroy da_small */
    da_big = (n1 < n2) ? da2 : da1;   /* do not destroy da_big */
    set1 = l_asetCreateFromDna(da_big);

        /* Build up the intersection of floats */
    dad = l_dnaCreate(0);
    n = l_dnaGetCount(da_small);
    set2 = l_asetCreate(L_FLOAT_TYPE);
    for (i = 0; i < n; i++) {
        l_dnaGetDValue(da_small, i, &val);
        key.ftype = val;
        if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
            l_dnaAddNumber(dad, val);
            l_asetInsert(set2, key);
        }
    }

    l_asetDestroy(&set1);
    l_asetDestroy(&set2);
    return dad;
}
Exemplo n.º 8
0
l_int32 main(int    argc,
             char **argv)
{
    L_ASET     *set;
    L_DNA      *da1, *da2, *da3, *da4, *da5, *da6, *da7, *da8, *dav, *dac;
    L_DNAHASH  *dahash;
    NUMA       *nav, *nac;
    PTA        *pta1, *pta2, *pta3;
    SARRAY     *sa1, *sa2, *sa3, *sa4;

    lept_mkdir("lept/hash");

#if 1
    /* Test string hashing with aset */
    fprintf(stderr, "Set results with string hashing:\n");
    sa1 = BuildShortStrings(3, 0);
    sa2 = BuildShortStrings(3, 1);
    fprintf(stderr, "  size with unique strings: %d\n", sarrayGetCount(sa1));
    fprintf(stderr, "  size with dups: %d\n", sarrayGetCount(sa2));
    startTimer();
    set = l_asetCreateFromSarray(sa2);
    fprintf(stderr, "  time to make set: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  size of set without dups: %d\n", l_asetSize(set));
    l_asetDestroy(&set);
    startTimer();
    sa3 = sarrayRemoveDupsByAset(sa2);
    fprintf(stderr, "  time to remove dups: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  size without dups = %d\n", sarrayGetCount(sa3));
    startTimer();
    sa4 = sarrayIntersectionByAset(sa1, sa2);
    fprintf(stderr, "  time to intersect: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  intersection size = %d\n", sarrayGetCount(sa4));
    sarrayDestroy(&sa3);
    sarrayDestroy(&sa4);

    /* Test sarray set operations with dna hash.
     * We use the same hash function as is used with aset. */
    fprintf(stderr, "\nDna hash results for sarray:\n");
    fprintf(stderr, "  size with unique strings: %d\n", sarrayGetCount(sa1));
    fprintf(stderr, "  size with dups: %d\n", sarrayGetCount(sa2));
    startTimer();
    dahash = l_dnaHashCreateFromSarray(sa2);
    fprintf(stderr, "  time to make hashmap: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  entries in hashmap with dups: %d\n",
            l_dnaHashGetTotalCount(dahash));
    l_dnaHashDestroy(&dahash);
    startTimer();
    sarrayRemoveDupsByHash(sa2, &sa3, NULL);
    fprintf(stderr, "  time to remove dups: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  size without dups = %d\n", sarrayGetCount(sa3));
    startTimer();
    sa4 = sarrayIntersectionByHash(sa1, sa2);
    fprintf(stderr, "  time to intersect: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  intersection size = %d\n", sarrayGetCount(sa4));
    sarrayDestroy(&sa3);
    sarrayDestroy(&sa4);
    sarrayDestroy(&sa1);
    sarrayDestroy(&sa2);
#endif

#if 1
    /* Test point hashing with aset.
     * Enter all points within a 1500 x 1500 image in pta1, and include
     * 450,000 duplicates in pta2.  With this pt hashing function,
     * there are no hash collisions among any of the 400 million pixel
     * locations in a 20000 x 20000 image. */
    pta1 = BuildPointSet(1500, 1500, 0);
    pta2 = BuildPointSet(1500, 1500, 1);
    fprintf(stderr, "\nSet results for pta:\n");
    fprintf(stderr, "  pta1 size with unique points: %d\n", ptaGetCount(pta1));
    fprintf(stderr, "  pta2 size with dups: %d\n", ptaGetCount(pta2));
    startTimer();
    pta3 = ptaRemoveDupsByAset(pta2);
    fprintf(stderr, "  Time to remove dups: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  size without dups = %d\n", ptaGetCount(pta3));
    ptaDestroy(&pta3);

    startTimer();
    pta3 = ptaIntersectionByAset(pta1, pta2);
    fprintf(stderr, "  Time to intersect: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  intersection size = %d\n", ptaGetCount(pta3));
    ptaDestroy(&pta1);
    ptaDestroy(&pta2);
    ptaDestroy(&pta3);
#endif

#if 1
    /* Test pta set operations with dna hash, using the same pt hashing
     * function.  Although there are no collisions in 20K x 20K images,
     * the dna hash implementation works properly even if there are some. */
    pta1 = BuildPointSet(1500, 1500, 0);
    pta2 = BuildPointSet(1500, 1500, 1);
    fprintf(stderr, "\nDna hash results for pta:\n");
    fprintf(stderr, "  pta1 size with unique points: %d\n", ptaGetCount(pta1));
    fprintf(stderr, "  pta2 size with dups: %d\n", ptaGetCount(pta2));
    startTimer();
    ptaRemoveDupsByHash(pta2, &pta3, NULL);
    fprintf(stderr, "  Time to remove dups: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  size without dups = %d\n", ptaGetCount(pta3));
    ptaDestroy(&pta3);

    startTimer();
    pta3 = ptaIntersectionByHash(pta1, pta2);
    fprintf(stderr, "  Time to intersect: %5.3f sec\n", stopTimer());
    fprintf(stderr, "  intersection size = %d\n", ptaGetCount(pta3));
    ptaDestroy(&pta1);
    ptaDestroy(&pta2);
    ptaDestroy(&pta3);
#endif

    /* Test dna set and histo operations using dna hash */
#if 1
    fprintf(stderr, "\nDna hash results for dna:\n");
    da1 = l_dnaMakeSequence(0.0, 0.125, 8000);
    da2 = l_dnaMakeSequence(300.0, 0.125, 8000);
    da3 = l_dnaMakeSequence(600.0, 0.125, 8000);
    da4 = l_dnaMakeSequence(900.0, 0.125, 8000);
    da5 = l_dnaMakeSequence(1200.0, 0.125, 8000);
    l_dnaJoin(da1, da2, 0, -1);
    l_dnaJoin(da1, da3, 0, -1);
    l_dnaJoin(da1, da4, 0, -1);
    l_dnaJoin(da1, da5, 0, -1);
    l_dnaRemoveDupsByHash(da1, &da6, &dahash);
    l_dnaHashDestroy(&dahash);
    fprintf(stderr, "  dna size with dups = %d\n", l_dnaGetCount(da1));
    fprintf(stderr, "  dna size of unique numbers = %d\n", l_dnaGetCount(da6));
    l_dnaMakeHistoByHash(da1, &dahash, &dav, &dac);
    nav = l_dnaConvertToNuma(dav);
    nac = l_dnaConvertToNuma(dac);
    fprintf(stderr, "  dna number of histo points = %d\n", l_dnaGetCount(dac));
    gplotSimpleXY1(nav, nac, GPLOT_IMPULSES, GPLOT_PNG,
                   "/tmp/lept/hash/histo", "Histo");
    da7 = l_dnaIntersectionByHash(da2, da3);
    fprintf(stderr, "  dna number of points: da2 = %d, da3 = %d\n",
            l_dnaGetCount(da2), l_dnaGetCount(da3));
    fprintf(stderr, "  dna number of da2/da3 intersection points = %d\n",
            l_dnaGetCount(da7));
    l_fileDisplay("/tmp/lept/hash/histo.png", 700, 100, 1.0);
    l_dnaDestroy(&da1);
    l_dnaDestroy(&da2);
    l_dnaDestroy(&da3);
    l_dnaDestroy(&da4);
    l_dnaDestroy(&da5);
    l_dnaDestroy(&da6);
    l_dnaDestroy(&da7);
    l_dnaDestroy(&dac);
    l_dnaDestroy(&dav);
    l_dnaHashDestroy(&dahash);
    numaDestroy(&nav);
    numaDestroy(&nac);
#endif

#if 1
    da1 = l_dnaMakeSequence(0, 3, 10000);
    da2 = l_dnaMakeSequence(0, 5, 10000);
    da3 = l_dnaMakeSequence(0, 7, 10000);
    l_dnaJoin(da1, da2, 0, -1);
    l_dnaJoin(da1, da3, 0, -1);

    fprintf(stderr, "\nDna results using set:\n");
    fprintf(stderr, "  da1 count: %d\n", l_dnaGetCount(da1));
    set = l_asetCreateFromDna(da1);
    fprintf(stderr, "  da1 set size: %d\n\n", l_asetSize(set));
    l_asetDestroy(&set);

    da4 = l_dnaUnionByAset(da2, da3);
    fprintf(stderr, "  da4 count: %d\n", l_dnaGetCount(da4));
    set = l_asetCreateFromDna(da4);
    fprintf(stderr, "  da4 set size: %d\n\n", l_asetSize(set));
    l_asetDestroy(&set);

    da5 = l_dnaIntersectionByAset(da1, da2);
    fprintf(stderr, "  da5 count: %d\n", l_dnaGetCount(da5));
    set = l_asetCreateFromDna(da5);
    fprintf(stderr, "  da5 set size: %d\n\n", l_asetSize(set));
    l_asetDestroy(&set);

    da6 = l_dnaMakeSequence(100000, 11, 5000);
    l_dnaJoin(da6, da1, 0, -1);
    fprintf(stderr, "  da6 count: %d\n", l_dnaGetCount(da6));
    set = l_asetCreateFromDna(da6);
    fprintf(stderr, "  da6 set size: %d\n\n", l_asetSize(set));
    l_asetDestroy(&set);

    da7 = l_dnaIntersectionByAset(da6, da3);
    fprintf(stderr, "  da7 count: %d\n", l_dnaGetCount(da7));
    set = l_asetCreateFromDna(da7);
    fprintf(stderr, "  da7 set size: %d\n\n", l_asetSize(set));
    l_asetDestroy(&set);

    da8 = l_dnaRemoveDupsByAset(da1);
    fprintf(stderr, "  da8 count: %d\n\n", l_dnaGetCount(da8));

    l_dnaDestroy(&da1);
    l_dnaDestroy(&da2);
    l_dnaDestroy(&da3);
    l_dnaDestroy(&da4);
    l_dnaDestroy(&da5);
    l_dnaDestroy(&da6);
    l_dnaDestroy(&da7);
    l_dnaDestroy(&da8);
#endif

    return 0;
}
Exemplo n.º 9
0
/*!
 * \brief   pixGetSortedNeighborValues()
 *
 * \param[in]     pixs 8, 16 or 32 bpp, with pixels labeled by c.c.
 * \param[in]     x, y location of pixel
 * \param[in]     conn 4 or 8 connected neighbors
 * \param[out]    pneigh array of integers, to be filled with
 *                      the values of the neighbors, if any
 * \param[out]    pnvals the number of unique neighbor values found
 * \return   0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The returned %neigh array is the unique set of neighboring
 *          pixel values, of size nvals, sorted from smallest to largest.
 *          The value 0, which represents background pixels that do
 *          not belong to any set of connected components, is discarded.
 *      (2) If there are no neighbors, this returns %neigh = NULL; otherwise,
 *          the caller must free the array.
 *      (3) For either 4 or 8 connectivity, the maximum number of unique
 *          neighbor values is 4.
 * </pre>
 */
l_int32
pixGetSortedNeighborValues(PIX       *pixs,
                           l_int32    x,
                           l_int32    y,
                           l_int32    conn,
                           l_int32  **pneigh,
                           l_int32   *pnvals)
{
l_int32       i, npt, index;
l_int32       neigh[4];
l_uint32      val;
l_float32     fx, fy;
L_ASET       *aset;
L_ASET_NODE  *node;
PTA          *pta;
RB_TYPE       key;

    PROCNAME("pixGetSortedNeighborValues");

    if (pneigh) *pneigh = NULL;
    if (pnvals) *pnvals = 0;
    if (!pneigh || !pnvals)
        return ERROR_INT("&neigh and &nvals not both defined", procName, 1);
    if (!pixs || pixGetDepth(pixs) < 8)
        return ERROR_INT("pixs not defined or depth < 8", procName, 1);

        /* Identify the locations of nearest neighbor pixels */
    if ((pta = ptaGetNeighborPixLocs(pixs, x, y, conn)) == NULL)
        return ERROR_INT("pta of neighbors not made", procName, 1);

        /* Find the pixel values and insert into a set as keys */
    aset = l_asetCreate(L_UINT_TYPE);
    npt = ptaGetCount(pta);
    for (i = 0; i < npt; i++) {
        ptaGetPt(pta, i, &fx, &fy);
        pixGetPixel(pixs, (l_int32)fx, (l_int32)fy, &val);
        key.utype = val;
        l_asetInsert(aset, key);
    }

        /* Extract the set keys and put them into the %neigh array.
         * Omit the value 0, which indicates the pixel doesn't
         * belong to one of the sets of connected components. */
    node = l_asetGetFirst(aset);
    index = 0;
    while (node) {
        val = node->key.utype;
        if (val > 0)
            neigh[index++] = (l_int32)val;
        node = l_asetGetNext(node);
    }
    *pnvals = index;
    if (index > 0) {
        *pneigh = (l_int32 *)LEPT_CALLOC(index, sizeof(l_int32));
        for (i = 0; i < index; i++)
            (*pneigh)[i] = neigh[i];
    }

    ptaDestroy(&pta);
    l_asetDestroy(&aset);
    return 0;
}