コード例 #1
0
ファイル: dnabasic.c プロジェクト: stweil/leptonica-old
/*!
 *  l_dnaJoin()
 *
 *      Input:  dad  (dest dma; add to this one)
 *              das  (<optional> source dna; add from this one)
 *              istart  (starting index in das)
 *              iend  (ending index in das; use -1 to cat all)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) istart < 0 is taken to mean 'read from the start' (istart = 0)
 *      (2) iend < 0 means 'read to the end'
 *      (3) if das == NULL, this is a no-op
 */
l_int32
l_dnaJoin(L_DNA   *dad,
          L_DNA   *das,
          l_int32  istart,
          l_int32  iend)
{
l_int32    n, i;
l_float64  val;

    PROCNAME("l_dnaJoin");

    if (!dad)
        return ERROR_INT("dad not defined", procName, 1);
    if (!das)
        return 0;

    if (istart < 0)
        istart = 0;
    n = l_dnaGetCount(das);
    if (iend < 0 || iend >= n)
        iend = n - 1;
    if (istart > iend)
        return ERROR_INT("istart > iend; nothing to add", procName, 1);

    for (i = istart; i <= iend; i++) {
        l_dnaGetDValue(das, i, &val);
        l_dnaAddNumber(dad, val);
    }

    return 0;
}
コード例 #2
0
ファイル: dnahash.c プロジェクト: chewi/leptonica
/*!
 * \brief   l_dnaHashCreateFromDna()
 *
 * \param[in]    da
 * \return  dahash if OK; 1 on error
 *
 * <pre>
 * Notes:
 *      (1) The values stored in the %dahash are indices into %da;
 *          %dahash has no use without %da.
 * </pre>
 */
L_DNAHASH *
l_dnaHashCreateFromDna(L_DNA  *da)
{
l_int32     i, n;
l_uint32    nsize;
l_uint64    key;
l_float64   val;
L_DNAHASH  *dahash;

    PROCNAME("l_dnaHashCreateFromDna");

    if (!da)
        return (L_DNAHASH *)ERROR_PTR("da not defined", procName, NULL);

    n = l_dnaGetCount(da);
    findNextLargerPrime(n / 20, &nsize);  /* buckets in hash table */

    dahash = l_dnaHashCreate(nsize, 8);
    for (i = 0; i < n; i++) {
        l_dnaGetDValue(da, i, &val);
        l_hashFloat64ToUint64(nsize, val, &key);
        l_dnaHashAdd(dahash, key, (l_float64)i);
    }

    return dahash;
}
コード例 #3
0
ファイル: dnafunc1.c プロジェクト: DanBloomberg/leptonica
/*!
 * \brief   l_dnaRemoveDupsByAset()
 *
 * \param[in]    das
 * \return  dad with duplicates removed, or NULL on error
 */
L_DNA *
l_dnaRemoveDupsByAset(L_DNA  *das)
{
l_int32    i, n;
l_float64  val;
L_DNA     *dad;
L_ASET    *set;
RB_TYPE    key;

    PROCNAME("l_dnaRemoveDupsByAset");

    if (!das)
        return (L_DNA *)ERROR_PTR("das not defined", procName, NULL);

    set = l_asetCreate(L_FLOAT_TYPE);
    dad = l_dnaCreate(0);
    n = l_dnaGetCount(das);
    for (i = 0; i < n; i++) {
        l_dnaGetDValue(das, i, &val);
        key.ftype = val;
        if (!l_asetFind(set, key)) {
            l_dnaAddNumber(dad, val);
            l_asetInsert(set, key);
        }
    }

    l_asetDestroy(&set);
    return dad;
}
コード例 #4
0
ファイル: dnahash.c プロジェクト: chewi/leptonica
/*!
 * \brief   l_dnaIntersectionByHash()
 *
 * \param[in]    da1, da2
 * \return  dad intersection of the number arrays, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) This uses the same method for building the intersection set
 *          as ptaIntersectionByHash() and sarrayIntersectionByHash().
 * </pre>
 */
L_DNA *
l_dnaIntersectionByHash(L_DNA  *da1,
                        L_DNA  *da2)
{
l_int32     n1, n2, nsmall, nbuckets, i, index1, index2;
l_uint32    nsize2;
l_uint64    key;
l_float64   val;
L_DNAHASH  *dahash1, *dahash2;
L_DNA      *da_small, *da_big, *dad;

    PROCNAME("l_dnaIntersectionByHash");

    if (!da1)
        return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL);
    if (!da2)
        return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL);

        /* Put the elements of the biggest array into a dnahash */
    n1 = l_dnaGetCount(da1);
    n2 = l_dnaGetCount(da2);
    da_small = (n1 < n2) ? da1 : da2;   /* do not destroy da_small */
    da_big = (n1 < n2) ? da2 : da1;   /* do not destroy da_big */
    dahash1 = l_dnaHashCreateFromDna(da_big);

        /* Build up the intersection of numbers.  Add to %dad
         * if the number is in da_big (using dahash1) but hasn't
         * yet been seen in the traversal of da_small (using dahash2). */
    dad = l_dnaCreate(0);
    nsmall = l_dnaGetCount(da_small);
    findNextLargerPrime(nsmall / 20, &nsize2);  /* buckets in hash table */
    dahash2 = l_dnaHashCreate(nsize2, 0);
    nbuckets = l_dnaHashGetCount(dahash2);
    for (i = 0; i < nsmall; i++) {
        l_dnaGetDValue(da_small, i, &val);
        l_dnaFindValByHash(da_big, dahash1, val, &index1);
        if (index1 >= 0) {  /* found */
            l_dnaFindValByHash(da_small, dahash2, val, &index2);
            if (index2 == -1) {  /* not found */
                l_dnaAddNumber(dad, val);
                l_hashFloat64ToUint64(nbuckets, val, &key);
                l_dnaHashAdd(dahash2, key, (l_float64)i);
            }
        }
    }

    l_dnaHashDestroy(&dahash1);
    l_dnaHashDestroy(&dahash2);
    return dad;
}
コード例 #5
0
ファイル: dnahash.c プロジェクト: chewi/leptonica
/*!
 * \brief   l_dnaMakeHistoByHash()
 *
 * \param[in]    das
 * \param[out]   pdahash hash map: val --> index
 * \param[out]   pdav array of values: index --> val
 * \param[out]   pdac histo array of counts: index --> count
 * \return  0 if OK; 1 on error
 *
 * <pre>
 * Notes:
 *      (1) Generates and returns a dna of occurrences (histogram),
 *          an aligned dna of values, and an associated hashmap.
 *          The hashmap takes %dav and a value, and points into the
 *          histogram in %dac.
 *      (2) The dna of values, %dav, is aligned with the histogram %dac,
 *          and is needed for fast lookup.  It is a hash set, because
 *          the values are unique.
 *      (3) Lookup is simple:
 *              l_dnaFindValByHash(dav, dahash, val, &index);
 *              if (index >= 0)
 *                  l_dnaGetIValue(dac, index, &icount);
 *              else
 *                  icount = 0;
 * </pre>
 */
l_ok
l_dnaMakeHistoByHash(L_DNA       *das,
                     L_DNAHASH  **pdahash,
                     L_DNA      **pdav,
                     L_DNA      **pdac)
{
l_int32     i, n, nitems, index, count;
l_uint32    nsize;
l_uint64    key;
l_float64   val;
L_DNA      *dac, *dav;
L_DNAHASH  *dahash;

    PROCNAME("l_dnaMakeHistoByHash");

    if (pdahash) *pdahash = NULL;
    if (pdac) *pdac = NULL;
    if (pdav) *pdav = NULL;
    if (!pdahash || !pdac || !pdav)
        return ERROR_INT("&dahash, &dac, &dav not all defined", procName, 1);
    if (!das)
        return ERROR_INT("das not defined", procName, 1);
    if ((n = l_dnaGetCount(das)) == 0)
        return ERROR_INT("no data in das", procName, 1);

    findNextLargerPrime(n / 20, &nsize);  /* buckets in hash table */
    dahash = l_dnaHashCreate(nsize, 8);
    dac = l_dnaCreate(n);  /* histogram */
    dav = l_dnaCreate(n);  /* the values */
    for (i = 0, nitems = 0; i < n; i++) {
        l_dnaGetDValue(das, i, &val);
            /* Is this value already stored in dav? */
        l_dnaFindValByHash(dav, dahash, val, &index);
        if (index >= 0) {  /* found */
            l_dnaGetIValue(dac, (l_float64)index, &count);
            l_dnaSetValue(dac, (l_float64)index, count + 1);
        } else {  /* not found */
            l_hashFloat64ToUint64(nsize, val, &key);
            l_dnaHashAdd(dahash, key, (l_float64)nitems);
            l_dnaAddNumber(dav, val);
            l_dnaAddNumber(dac, 1);
            nitems++;
        }
    }

    *pdahash = dahash;
    *pdac = dac;
    *pdav = dav;
    return 0;
}
コード例 #6
0
ファイル: dnahash.c プロジェクト: chewi/leptonica
/*!
 * \brief   l_dnaRemoveDupsByHash()
 *
 * \param[in]    das
 * \param[out]   pdad hash set
 * \param[out]   pdahash [optional] dnahash used for lookup
 * \return  0 if OK; 1 on error
 *
 * <pre>
 * Notes:
 *      (1) Generates a dna with unique values.
 *      (2) The dnahash is built up with dad to assure uniqueness.
 *          It can be used to find if an element is in the set:
 *              l_dnaFindValByHash(dad, dahash, val, &index)
 * </pre>
 */
l_ok
l_dnaRemoveDupsByHash(L_DNA       *das,
                      L_DNA      **pdad,
                      L_DNAHASH  **pdahash)
{
l_int32     i, n, index, items;
l_uint32    nsize;
l_uint64    key;
l_float64   val;
L_DNA      *dad;
L_DNAHASH  *dahash;

    PROCNAME("l_dnaRemoveDupsByHash");

    if (pdahash) *pdahash = NULL;
    if (!pdad)
        return ERROR_INT("&dad not defined", procName, 1);
    *pdad = NULL;
    if (!das)
        return ERROR_INT("das not defined", procName, 1);

    n = l_dnaGetCount(das);
    findNextLargerPrime(n / 20, &nsize);  /* buckets in hash table */
    dahash = l_dnaHashCreate(nsize, 8);
    dad = l_dnaCreate(n);
    *pdad = dad;
    for (i = 0, items = 0; i < n; i++) {
        l_dnaGetDValue(das, i, &val);
        l_dnaFindValByHash(dad, dahash, val, &index);
        if (index < 0) {  /* not found */
            l_hashFloat64ToUint64(nsize, val, &key);
            l_dnaHashAdd(dahash, key, (l_float64)items);
            l_dnaAddNumber(dad, val);
            items++;
        }
    }

    if (pdahash)
        *pdahash = dahash;
    else
        l_dnaHashDestroy(&dahash);
    return 0;
}
コード例 #7
0
ファイル: dnabasic.c プロジェクト: stweil/leptonica-old
/*!
 *  l_dnaConvertToNuma()
 *
 *      Input:  da
 *      Return: na, or null on error
 */
NUMA *
l_dnaConvertToNuma(L_DNA  *da)
{
l_int32    i, n;
l_float64  val;
NUMA      *na;

    PROCNAME("l_dnaConvertToNuma");

    if (!da)
        return (NUMA *)ERROR_PTR("da not defined", procName, NULL);

    n = l_dnaGetCount(da);
    na = numaCreate(n);
    for (i = 0; i < n; i++) {
        l_dnaGetDValue(da, i, &val);
        numaAddNumber(na, val);
    }
    return na;
}
コード例 #8
0
ファイル: dnafunc1.c プロジェクト: DanBloomberg/leptonica
/*!
 * \brief   l_dnaIntersectionByAset()
 *
 * \param[in]    da1, da2
 * \return  dad with the intersection of the two arrays, or NULL on error
 *
 * <pre>
 * Notes:
 *      (1) See sarrayIntersection() for the approach.
 *      (2) Here, the key in building the sorted tree is the number itself.
 *      (3) Operations using an underlying tree are O(nlogn), which is
 *          typically less efficient than hashing, which is O(n).
 * </pre>
 */
L_DNA *
l_dnaIntersectionByAset(L_DNA  *da1,
                        L_DNA  *da2)
{
l_int32    n1, n2, i, n;
l_float64  val;
L_ASET    *set1, *set2;
RB_TYPE    key;
L_DNA     *da_small, *da_big, *dad;

    PROCNAME("l_dnaIntersectionByAset");

    if (!da1)
        return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL);
    if (!da2)
        return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL);

        /* Put the elements of the largest array into a set */
    n1 = l_dnaGetCount(da1);
    n2 = l_dnaGetCount(da2);
    da_small = (n1 < n2) ? da1 : da2;   /* do not destroy da_small */
    da_big = (n1 < n2) ? da2 : da1;   /* do not destroy da_big */
    set1 = l_asetCreateFromDna(da_big);

        /* Build up the intersection of floats */
    dad = l_dnaCreate(0);
    n = l_dnaGetCount(da_small);
    set2 = l_asetCreate(L_FLOAT_TYPE);
    for (i = 0; i < n; i++) {
        l_dnaGetDValue(da_small, i, &val);
        key.ftype = val;
        if (l_asetFind(set1, key) && !l_asetFind(set2, key)) {
            l_dnaAddNumber(dad, val);
            l_asetInsert(set2, key);
        }
    }

    l_asetDestroy(&set1);
    l_asetDestroy(&set2);
    return dad;
}
コード例 #9
0
ファイル: dnahash.c プロジェクト: chewi/leptonica
/*!
 * \brief   l_dnaFindValByHash()
 *
 * \param[in]    da
 * \param[in]    dahash containing indices into %da
 * \param[in]    val  searching for this number in %da
 * \param[out]   pindex index into da if found; -1 otherwise
 * \return  0 if OK; 1 on error
 *
 * <pre>
 * Notes:
 *      (1) Algo: hash %val into a key; hash the key to get the dna
 *                in %dahash (that holds indices into %da); traverse
 *                the dna of indices looking for %val in %da.
 * </pre>
 */
l_ok
l_dnaFindValByHash(L_DNA      *da,
                   L_DNAHASH  *dahash,
                   l_float64   val,
                   l_int32    *pindex)
{
l_int32    i, nbuckets, nvals, indexval;
l_float64  vali;
l_uint64   key;
L_DNA     *da1;

    PROCNAME("l_dnaFindValByHash");

    if (!pindex)
        return ERROR_INT("&index not defined", procName, 1);
    *pindex = -1;
    if (!da)
        return ERROR_INT("da not defined", procName, 1);
    if (!dahash)
        return ERROR_INT("dahash not defined", procName, 1);

    nbuckets = l_dnaHashGetCount(dahash);
    l_hashFloat64ToUint64(nbuckets, val, &key);
    da1 = l_dnaHashGetDna(dahash, key, L_NOCOPY);
    if (!da1) return 0;

        /* Run through da1, looking for this %val */
    nvals = l_dnaGetCount(da1);
    for (i = 0; i < nvals; i++) {
        l_dnaGetIValue(da1, i, &indexval);
        l_dnaGetDValue(da, indexval, &vali);
        if (val == vali) {
            *pindex = indexval;
            return 0;
        }
    }

    return 0;
}
コード例 #10
0
ファイル: dnafunc1.c プロジェクト: DanBloomberg/leptonica
/*!
 * \brief   l_asetCreateFromDna()
 *
 * \param[in]    da source dna
 * \return  set using the doubles in %da as keys
 */
L_ASET *
l_asetCreateFromDna(L_DNA  *da)
{
l_int32    i, n;
l_float64  val;
L_ASET    *set;
RB_TYPE    key;

    PROCNAME("l_asetCreateFromDna");

    if (!da)
        return (L_ASET *)ERROR_PTR("da not defined", procName, NULL);

    set = l_asetCreate(L_FLOAT_TYPE);
    n = l_dnaGetCount(da);
    for (i = 0; i < n; i++) {
        l_dnaGetDValue(da, i, &val);
        key.ftype = val;
        l_asetInsert(set, key);
    }

    return set;
}