/*!
 * \brief   l_asetCreateFromSarray()
 *
 * \param[in]    sa
 * \return  set using a string hash into a uint32 as the key
 */
L_ASET *
l_asetCreateFromSarray(SARRAY  *sa)
{
char     *str;
l_int32   i, n;
l_uint64  hash;
L_ASET   *set;
RB_TYPE   key;

    PROCNAME("l_asetCreateFromSarray");

    if (!sa)
        return (L_ASET *)ERROR_PTR("sa not defined", procName, NULL);

    set = l_asetCreate(L_UINT_TYPE);
    n = sarrayGetCount(sa);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sa, i, L_NOCOPY);
        l_hashStringToUint64(str, &hash);
        key.utype = hash;
        l_asetInsert(set, key);
    }

    return set;
}
示例#2
0
/*!
 *  ptaRemoveDupsByAset()
 *
 *      Input:  ptas (assumed to be integer values)
 *      Return: ptad (with duplicates removed), or null on error
 *
 *  Notes:
 *      (1) This is slower than ptaRemoveDupsByHash(), mostly because
 *          of the nlogn sort to build up the rbtree.  Do not use for
 *          large numbers of points (say, > 1M).
 */
PTA *
ptaRemoveDupsByAset(PTA  *ptas)
{
l_int32   i, n, x, y;
PTA      *ptad;
l_uint64  hash;
L_ASET   *set;
RB_TYPE   key;

    PROCNAME("ptaRemoveDupsByAset");

    if (!ptas)
        return (PTA *)ERROR_PTR("ptas not defined", procName, NULL);

    set = l_asetCreate(L_UINT_TYPE);
    n = ptaGetCount(ptas);
    ptad = ptaCreate(n);
    for (i = 0; i < n; i++) {
        ptaGetIPt(ptas, i, &x, &y);
        l_hashPtToUint64(x, y, &hash);
        key.utype = hash;
        if (!l_asetFind(set, key)) {
            ptaAddPt(ptad, x, y);
            l_asetInsert(set, key);
        }
    }

    l_asetDestroy(&set);
    return ptad;
}
/*!
 * \brief   sarrayRemoveDupsByAset()
 *
 * \param[in]    sas
 * \return  sad with duplicates removed, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) This is O(nlogn), considerably slower than
 *          sarrayRemoveDupsByHash() for large string arrays.
 *      (2) The key for each string is a 64-bit hash.
 *      (3) Build a set, using hashed strings as keys.  As the set is
 *          built, first do a find; if not found, add the key to the
 *          set and add the string to the output sarray.
 * </pre>
 */
SARRAY *
sarrayRemoveDupsByAset(SARRAY  *sas)
{
char     *str;
l_int32   i, n;
l_uint64  hash;
L_ASET   *set;
RB_TYPE   key;
SARRAY   *sad;

    PROCNAME("sarrayRemoveDupsByAset");

    if (!sas)
        return (SARRAY *)ERROR_PTR("sas not defined", procName, NULL);

    set = l_asetCreate(L_UINT_TYPE);
    sad = sarrayCreate(0);
    n = sarrayGetCount(sas);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sas, i, L_NOCOPY);
        l_hashStringToUint64(str, &hash);
        key.utype = hash;
        if (!l_asetFind(set, key)) {
            sarrayAddString(sad, str, L_COPY);
            l_asetInsert(set, key);
        }
    }

    l_asetDestroy(&set);
    return sad;
}
示例#4
0
/*!
 * \brief   l_dnaRemoveDupsByAset()
 *
 * \param[in]    das
 * \return  dad with duplicates removed, or NULL on error
 */
L_DNA *
l_dnaRemoveDupsByAset(L_DNA  *das)
{
l_int32    i, n;
l_float64  val;
L_DNA     *dad;
L_ASET    *set;
RB_TYPE    key;

    PROCNAME("l_dnaRemoveDupsByAset");

    if (!das)
        return (L_DNA *)ERROR_PTR("das not defined", procName, NULL);

    set = l_asetCreate(L_FLOAT_TYPE);
    dad = l_dnaCreate(0);
    n = l_dnaGetCount(das);
    for (i = 0; i < n; i++) {
        l_dnaGetDValue(das, i, &val);
        key.ftype = val;
        if (!l_asetFind(set, key)) {
            l_dnaAddNumber(dad, val);
            l_asetInsert(set, key);
        }
    }

    l_asetDestroy(&set);
    return dad;
}
/*!
 * \brief   sarrayIntersectionByAset()
 *
 * \param[in]    sa1, sa2
 * \return  sad with the intersection of the string set, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) Algorithm: put the smaller sarray into a set, using the string
 *          hashes as the key values.  Then run through the larger sarray,
 *          building an output sarray and a second set from the strings
 *          in the larger array: if a string is in the first set but
 *          not in the second, add the string to the output sarray and hash
 *          it into the second set.  The second set is required to make
 *          sure only one instance of each string is put into the output sarray.
 *          This is O(mlogn), {m,n} = sizes of {smaller,larger} input arrays.
 * </pre>
 */
SARRAY *
sarrayIntersectionByAset(SARRAY  *sa1,
                         SARRAY  *sa2)
{
char     *str;
l_int32   n1, n2, i, n;
l_uint64  hash;
L_ASET   *set1, *set2;
RB_TYPE   key;
SARRAY   *sa_small, *sa_big, *sad;

    PROCNAME("sarrayIntersectionByAset");

    if (!sa1)
        return (SARRAY *)ERROR_PTR("sa1 not defined", procName, NULL);
    if (!sa2)
        return (SARRAY *)ERROR_PTR("sa2 not defined", procName, NULL);

        /* Put the elements of the biggest array into a set */
    n1 = sarrayGetCount(sa1);
    n2 = sarrayGetCount(sa2);
    sa_small = (n1 < n2) ? sa1 : sa2;   /* do not destroy sa_small */
    sa_big = (n1 < n2) ? sa2 : sa1;   /* do not destroy sa_big */
    set1 = l_asetCreateFromSarray(sa_big);

        /* Build up the intersection of strings */
    sad = sarrayCreate(0);
    n = sarrayGetCount(sa_small);
    set2 = l_asetCreate(L_UINT_TYPE);
    for (i = 0; i < n; i++) {
        str = sarrayGetString(sa_small, i, L_NOCOPY);
        l_hashStringToUint64(str, &hash);
        key.utype = hash;
        if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
            sarrayAddString(sad, str, L_COPY);
            l_asetInsert(set2, key);
        }
    }

    l_asetDestroy(&set1);
    l_asetDestroy(&set2);
    return sad;
}
示例#6
0
/*!
 *  ptaIntersectionByAset()
 *
 *      Input:  pta1, pta2
 *      Return: ptad (intersection of the point sets), or null on error
 *
 *  Notes:
 *      (1) See sarrayIntersectionByAset() for the approach.
 *      (2) The key is a 64-bit hash from the (x,y) pair.
 *      (3) This is slower than ptaIntersectionByHash(), mostly because
 *          of the nlogn sort to build up the rbtree.  Do not use for
 *          large numbers of points (say, > 1M).
 */
PTA *
ptaIntersectionByAset(PTA  *pta1,
                      PTA  *pta2)
{
l_int32   n1, n2, i, n, x, y;
l_uint64  hash;
L_ASET   *set1, *set2;
RB_TYPE   key;
PTA      *pta_small, *pta_big, *ptad;

    PROCNAME("ptaIntersectionByAset");

    if (!pta1)
        return (PTA *)ERROR_PTR("pta1 not defined", procName, NULL);
    if (!pta2)
        return (PTA *)ERROR_PTR("pta2 not defined", procName, NULL);

        /* Put the elements of the biggest array into a set */
    n1 = ptaGetCount(pta1);
    n2 = ptaGetCount(pta2);
    pta_small = (n1 < n2) ? pta1 : pta2;   /* do not destroy pta_small */
    pta_big = (n1 < n2) ? pta2 : pta1;   /* do not destroy pta_big */
    set1 = l_asetCreateFromPta(pta_big);

        /* Build up the intersection of points */
    ptad = ptaCreate(0);
    n = ptaGetCount(pta_small);
    set2 = l_asetCreate(L_UINT_TYPE);
    for (i = 0; i < n; i++) {
        ptaGetIPt(pta_small, i, &x, &y);
        l_hashPtToUint64(x, y, &hash);
        key.utype = hash;
        if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
            ptaAddPt(ptad, x, y);
            l_asetInsert(set2, key);
        }
    }

    l_asetDestroy(&set1);
    l_asetDestroy(&set2);
    return ptad;
}
示例#7
0
文件: settest.c 项目: chewi/leptonica
static L_ASET *
BuildSet(PIX     *pix,
         l_int32  factor,
         l_int32  print)
{
l_int32    i, j, w, h, wpl, val;
l_uint32   val32;
l_uint32  *data, *line;
L_ASET    *s;
PIXCMAP   *cmap;
RB_TYPE    key;
RB_TYPE   *pval;

    fprintf(stderr, "\n --------------- Begin building set --------------\n");
    s = l_asetCreate(L_UINT_TYPE);
    data = pixGetData(pix);
    wpl = pixGetWpl(pix);
    cmap = pixGetColormap(pix);
    pixGetDimensions(pix, &w, &h, NULL);
    for (i = 0; i < h; i += factor) {
        line = data + i * wpl;
        for (j = 0; j < w; j += factor) {
            if (cmap) {
                val = GET_DATA_BYTE(line, j);
                pixcmapGetColor32(cmap, val, &val32);
                key.utype = val32;
            } else {
                key.utype = line[j];
            }
            pval = l_asetFind(s, key);
            if (pval && print)
                fprintf(stderr, "key = %llx\n", key.utype);
            l_asetInsert(s, key);
        }
    }
    fprintf(stderr, "Size: %d\n", l_asetSize(s));
    if (print)
        l_rbtreePrint(stderr, s);
    fprintf(stderr, " ----------- End Building set -----------------\n");

    return s;
}
示例#8
0
/*!
 * \brief   l_dnaIntersectionByAset()
 *
 * \param[in]    da1, da2
 * \return  dad with the intersection of the two arrays, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) See sarrayIntersection() for the approach.
 *      (2) Here, the key in building the sorted tree is the number itself.
 *      (3) Operations using an underlying tree are O(nlogn), which is
 *          typically less efficient than hashing, which is O(n).
 * </pre>
 */
L_DNA *
l_dnaIntersectionByAset(L_DNA  *da1,
                        L_DNA  *da2)
{
l_int32    n1, n2, i, n;
l_float64  val;
L_ASET    *set1, *set2;
RB_TYPE    key;
L_DNA     *da_small, *da_big, *dad;

    PROCNAME("l_dnaIntersectionByAset");

    if (!da1)
        return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL);
    if (!da2)
        return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL);

        /* Put the elements of the largest array into a set */
    n1 = l_dnaGetCount(da1);
    n2 = l_dnaGetCount(da2);
    da_small = (n1 < n2) ? da1 : da2;   /* do not destroy da_small */
    da_big = (n1 < n2) ? da2 : da1;   /* do not destroy da_big */
    set1 = l_asetCreateFromDna(da_big);

        /* Build up the intersection of floats */
    dad = l_dnaCreate(0);
    n = l_dnaGetCount(da_small);
    set2 = l_asetCreate(L_FLOAT_TYPE);
    for (i = 0; i < n; i++) {
        l_dnaGetDValue(da_small, i, &val);
        key.ftype = val;
        if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
            l_dnaAddNumber(dad, val);
            l_asetInsert(set2, key);
        }
    }

    l_asetDestroy(&set1);
    l_asetDestroy(&set2);
    return dad;
}
示例#9
0
/*!
 * \brief   l_asetCreateFromDna()
 *
 * \param[in]    da source dna
 * \return  set using the doubles in %da as keys
 */
L_ASET *
l_asetCreateFromDna(L_DNA  *da)
{
l_int32    i, n;
l_float64  val;
L_ASET    *set;
RB_TYPE    key;

    PROCNAME("l_asetCreateFromDna");

    if (!da)
        return (L_ASET *)ERROR_PTR("da not defined", procName, NULL);

    set = l_asetCreate(L_FLOAT_TYPE);
    n = l_dnaGetCount(da);
    for (i = 0; i < n; i++) {
        l_dnaGetDValue(da, i, &val);
        key.ftype = val;
        l_asetInsert(set, key);
    }

    return set;
}
示例#10
0
/*!
 *  l_asetCreateFromPta()
 *
 *      Input:  pta
 *      Return: set (using a 64-bit hash of (x,y) as the key)
 */
L_ASET *
l_asetCreateFromPta(PTA  *pta)
{
l_int32   i, n, x, y;
l_uint64  hash;
L_ASET   *set;
RB_TYPE   key;

    PROCNAME("l_asetCreateFromPta");

    if (!pta)
        return (L_ASET *)ERROR_PTR("pta not defined", procName, NULL);

    set = l_asetCreate(L_UINT_TYPE);
    n = ptaGetCount(pta);
    for (i = 0; i < n; i++) {
        ptaGetIPt(pta, i, &x, &y);
        l_hashPtToUint64(x, y, &hash);
        key.utype = hash;
        l_asetInsert(set, key);
    }

    return set;
}
示例#11
0
/*!
 * \brief   pixGetSortedNeighborValues()
 *
 * \param[in]     pixs 8, 16 or 32 bpp, with pixels labeled by c.c.
 * \param[in]     x, y location of pixel
 * \param[in]     conn 4 or 8 connected neighbors
 * \param[out]    pneigh array of integers, to be filled with
 *                      the values of the neighbors, if any
 * \param[out]    pnvals the number of unique neighbor values found
 * \return   0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The returned %neigh array is the unique set of neighboring
 *          pixel values, of size nvals, sorted from smallest to largest.
 *          The value 0, which represents background pixels that do
 *          not belong to any set of connected components, is discarded.
 *      (2) If there are no neighbors, this returns %neigh = NULL; otherwise,
 *          the caller must free the array.
 *      (3) For either 4 or 8 connectivity, the maximum number of unique
 *          neighbor values is 4.
 * </pre>
 */
l_int32
pixGetSortedNeighborValues(PIX       *pixs,
                           l_int32    x,
                           l_int32    y,
                           l_int32    conn,
                           l_int32  **pneigh,
                           l_int32   *pnvals)
{
l_int32       i, npt, index;
l_int32       neigh[4];
l_uint32      val;
l_float32     fx, fy;
L_ASET       *aset;
L_ASET_NODE  *node;
PTA          *pta;
RB_TYPE       key;

    PROCNAME("pixGetSortedNeighborValues");

    if (pneigh) *pneigh = NULL;
    if (pnvals) *pnvals = 0;
    if (!pneigh || !pnvals)
        return ERROR_INT("&neigh and &nvals not both defined", procName, 1);
    if (!pixs || pixGetDepth(pixs) < 8)
        return ERROR_INT("pixs not defined or depth < 8", procName, 1);

        /* Identify the locations of nearest neighbor pixels */
    if ((pta = ptaGetNeighborPixLocs(pixs, x, y, conn)) == NULL)
        return ERROR_INT("pta of neighbors not made", procName, 1);

        /* Find the pixel values and insert into a set as keys */
    aset = l_asetCreate(L_UINT_TYPE);
    npt = ptaGetCount(pta);
    for (i = 0; i < npt; i++) {
        ptaGetPt(pta, i, &fx, &fy);
        pixGetPixel(pixs, (l_int32)fx, (l_int32)fy, &val);
        key.utype = val;
        l_asetInsert(aset, key);
    }

        /* Extract the set keys and put them into the %neigh array.
         * Omit the value 0, which indicates the pixel doesn't
         * belong to one of the sets of connected components. */
    node = l_asetGetFirst(aset);
    index = 0;
    while (node) {
        val = node->key.utype;
        if (val > 0)
            neigh[index++] = (l_int32)val;
        node = l_asetGetNext(node);
    }
    *pnvals = index;
    if (index > 0) {
        *pneigh = (l_int32 *)LEPT_CALLOC(index, sizeof(l_int32));
        for (i = 0; i < index; i++)
            (*pneigh)[i] = neigh[i];
    }

    ptaDestroy(&pta);
    l_asetDestroy(&aset);
    return 0;
}