Example #1
0
/*!
 * \brief   recogGetClassIndex()
 *
 * \param[in]    recog with LUT's pre-computed
 * \param[in]    val integer value; can be up to 3 bytes for UTF-8
 * \param[in]    text text from which %val was derived; used if not found
 * \param[out]   pindex index into dna_tochar
 * \return  0 if found; 1 if not found and added; 2 on error.
 *
 * <pre>
 * Notes:
 *      (1) This is used during training.  There is one entry in
 *          recog->dna_tochar (integer value, e.g., ascii) and
 *          one in recog->sa_text (e.g, ascii letter in a string)
 *          for each character class.
 *      (2) This searches the dna character array for %val.  If it is
 *          not found, the template represents a character class not
 *          already seen: it increments setsize (the number of character
 *          classes) by 1, and augments both the index (dna_tochar)
 *          and text (sa_text) arrays.
 *      (3) Returns the index in &index, except on error.
 *      (4) Caller must check the function return value.
 * </pre>
 */
l_int32
recogGetClassIndex(L_RECOG  *recog,
                   l_int32   val,
                   char     *text,
                   l_int32  *pindex)
{
l_int32  i, n, ival;

    PROCNAME("recogGetClassIndex");

    if (!pindex)
        return ERROR_INT("&index not defined", procName, 2);
    *pindex = -1;
    if (!recog)
        return ERROR_INT("recog not defined", procName, 2);
    if (!text)
        return ERROR_INT("text not defined", procName, 2);

        /* Search existing characters */
    n = l_dnaGetCount(recog->dna_tochar);
    for (i = 0; i < n; i++) {
        l_dnaGetIValue(recog->dna_tochar, i, &ival);
        if (val == ival) {  /* found */
            *pindex = i;
            return 0;
        }
    }

       /* If not found... */
    l_dnaAddNumber(recog->dna_tochar, val);
    sarrayAddString(recog->sa_text, text, L_COPY);
    recog->setsize++;
    *pindex = n;
    return 1;
}
Example #2
0
/*!
 * \brief   l_dnaMakeHistoByHash()
 *
 * \param[in]    das
 * \param[out]   pdahash hash map: val --> index
 * \param[out]   pdav array of values: index --> val
 * \param[out]   pdac histo array of counts: index --> count
 * \return  0 if OK; 1 on error
 *
 * <pre>
 * Notes:
 *      (1) Generates and returns a dna of occurrences (histogram),
 *          an aligned dna of values, and an associated hashmap.
 *          The hashmap takes %dav and a value, and points into the
 *          histogram in %dac.
 *      (2) The dna of values, %dav, is aligned with the histogram %dac,
 *          and is needed for fast lookup.  It is a hash set, because
 *          the values are unique.
 *      (3) Lookup is simple:
 *              l_dnaFindValByHash(dav, dahash, val, &index);
 *              if (index >= 0)
 *                  l_dnaGetIValue(dac, index, &icount);
 *              else
 *                  icount = 0;
 * </pre>
 */
l_ok
l_dnaMakeHistoByHash(L_DNA       *das,
                     L_DNAHASH  **pdahash,
                     L_DNA      **pdav,
                     L_DNA      **pdac)
{
l_int32     i, n, nitems, index, count;
l_uint32    nsize;
l_uint64    key;
l_float64   val;
L_DNA      *dac, *dav;
L_DNAHASH  *dahash;

    PROCNAME("l_dnaMakeHistoByHash");

    if (pdahash) *pdahash = NULL;
    if (pdac) *pdac = NULL;
    if (pdav) *pdav = NULL;
    if (!pdahash || !pdac || !pdav)
        return ERROR_INT("&dahash, &dac, &dav not all defined", procName, 1);
    if (!das)
        return ERROR_INT("das not defined", procName, 1);
    if ((n = l_dnaGetCount(das)) == 0)
        return ERROR_INT("no data in das", procName, 1);

    findNextLargerPrime(n / 20, &nsize);  /* buckets in hash table */
    dahash = l_dnaHashCreate(nsize, 8);
    dac = l_dnaCreate(n);  /* histogram */
    dav = l_dnaCreate(n);  /* the values */
    for (i = 0, nitems = 0; i < n; i++) {
        l_dnaGetDValue(das, i, &val);
            /* Is this value already stored in dav? */
        l_dnaFindValByHash(dav, dahash, val, &index);
        if (index >= 0) {  /* found */
            l_dnaGetIValue(dac, (l_float64)index, &count);
            l_dnaSetValue(dac, (l_float64)index, count + 1);
        } else {  /* not found */
            l_hashFloat64ToUint64(nsize, val, &key);
            l_dnaHashAdd(dahash, key, (l_float64)nitems);
            l_dnaAddNumber(dav, val);
            l_dnaAddNumber(dac, 1);
            nitems++;
        }
    }

    *pdahash = dahash;
    *pdac = dac;
    *pdav = dav;
    return 0;
}
Example #3
0
/*!
 *  l_dnaMakeDelta()
 *
 *      Input:  das (input l_dna)
 *      Return: dad (of difference values val[i+1] - val[i]),
 *                   or null on error
 */
L_DNA *
l_dnaMakeDelta(L_DNA  *das)
{
l_int32  i, n, prev, cur;
L_DNA   *dad;

    PROCNAME("l_dnaMakeDelta");

    if (!das)
        return (L_DNA *)ERROR_PTR("das not defined", procName, NULL);
    n = l_dnaGetCount(das);
    dad = l_dnaCreate(n - 1);
    prev = 0;
    for (i = 1; i < n; i++) {
        l_dnaGetIValue(das, i, &cur);
        l_dnaAddNumber(dad, cur - prev);
        prev = cur;
    }
    return dad;
}
Example #4
0
/*!
 *  l_dnaGetIArray()
 *
 *      Input:  da
 *      Return: a copy of the bare internal array, integerized
 *              by rounding, or null on error
 *  Notes:
 *      (1) A copy of the array is made, because we need to
 *          generate an integer array from the bare double array.
 *          The caller is responsible for freeing the array.
 *      (2) The array size is determined by the number of stored numbers,
 *          not by the size of the allocated array in the l_dna.
 *      (3) This function is provided to simplify calculations
 *          using the bare internal array, rather than continually
 *          calling accessors on the l_dna.  It is typically used
 *          on an array of size 256.
 */
l_int32 *
l_dnaGetIArray(L_DNA  *da)
{
l_int32   i, n, ival;
l_int32  *array;

    PROCNAME("l_dnaGetIArray");

    if (!da)
        return (l_int32 *)ERROR_PTR("da not defined", procName, NULL);

    n = l_dnaGetCount(da);
    if ((array = (l_int32 *)CALLOC(n, sizeof(l_int32))) == NULL)
        return (l_int32 *)ERROR_PTR("array not made", procName, NULL);
    for (i = 0; i < n; i++) {
        l_dnaGetIValue(da, i, &ival);
        array[i] = ival;
    }

    return array;
}
Example #5
0
/*!
 * \brief   l_dnaFindValByHash()
 *
 * \param[in]    da
 * \param[in]    dahash containing indices into %da
 * \param[in]    val  searching for this number in %da
 * \param[out]   pindex index into da if found; -1 otherwise
 * \return  0 if OK; 1 on error
 *
 * <pre>
 * Notes:
 *      (1) Algo: hash %val into a key; hash the key to get the dna
 *                in %dahash (that holds indices into %da); traverse
 *                the dna of indices looking for %val in %da.
 * </pre>
 */
l_ok
l_dnaFindValByHash(L_DNA      *da,
                   L_DNAHASH  *dahash,
                   l_float64   val,
                   l_int32    *pindex)
{
l_int32    i, nbuckets, nvals, indexval;
l_float64  vali;
l_uint64   key;
L_DNA     *da1;

    PROCNAME("l_dnaFindValByHash");

    if (!pindex)
        return ERROR_INT("&index not defined", procName, 1);
    *pindex = -1;
    if (!da)
        return ERROR_INT("da not defined", procName, 1);
    if (!dahash)
        return ERROR_INT("dahash not defined", procName, 1);

    nbuckets = l_dnaHashGetCount(dahash);
    l_hashFloat64ToUint64(nbuckets, val, &key);
    da1 = l_dnaHashGetDna(dahash, key, L_NOCOPY);
    if (!da1) return 0;

        /* Run through da1, looking for this %val */
    nvals = l_dnaGetCount(da1);
    for (i = 0; i < nvals; i++) {
        l_dnaGetIValue(da1, i, &indexval);
        l_dnaGetDValue(da, indexval, &vali);
        if (val == vali) {
            *pindex = indexval;
            return 0;
        }
    }

    return 0;
}
Example #6
0
/*!
 *  ptaFindPtByHash()
 *
 *      Input:  pta
 *              dahash (built from pta)
 *              x, y  (arbitrary points)
 *              &index (<return> index into pta if (x,y) is in pta;
 *                      -1 otherwise)
 *      Return: 0 if OK, 1 on error
 *
 *  Notes:
 *      (1) Fast lookup in dnaHash associated with a pta, to see if a
 *          random point (x,y) is already stored in the hash table.
 */
l_int32
ptaFindPtByHash(PTA        *pta,
                L_DNAHASH  *dahash,
                l_int32     x,
                l_int32     y,
                l_int32    *pindex)
{
l_int32   i, nbuckets, nvals, index, xi, yi;
l_uint64  key;
L_DNA    *da;

    PROCNAME("ptaFindPtByHash");

    if (!pindex)
        return ERROR_INT("&index not defined", procName, 1);
    *pindex = -1;
    if (!pta)
        return ERROR_INT("pta not defined", procName, 1);
    if (!dahash)
        return ERROR_INT("dahash not defined", procName, 1);

    nbuckets = l_dnaHashGetCount(dahash);
    l_hashPtToUint64Fast(nbuckets, x, y, &key);
    da = l_dnaHashGetDna(dahash, key, L_NOCOPY);
    if (!da) return 0;

        /* Run through the da, looking for this point */
    nvals = l_dnaGetCount(da);
    for (i = 0; i < nvals; i++) {
        l_dnaGetIValue(da, i, &index);
        ptaGetIPt(pta, index, &xi, &yi);
        if (x == xi && y == yi) {
            *pindex = index;
            return 0;
        }
    }

    return 0;
}
/*!
 * \brief   sarrayFindStringByHash()
 *
 * \param[in]    sa
 * \param[in]    dahash built from sa
 * \param[in]    str  arbitrary string
 * \param[out]   pindex index into %sa if %str is in %sa;
 *              -1 otherwise
 * \return  0 if OK, 1 on error
 *
 * <pre>
 * Notes:
 *      (1) Fast lookup in dnaHash associated with a sarray, to see if a
 *          random string %str is already stored in the hash table.
 * </pre>
 */
l_int32
sarrayFindStringByHash(SARRAY      *sa,
                       L_DNAHASH   *dahash,
                       const char  *str,
                       l_int32     *pindex)
{
char     *stri;
l_int32   i, nvals, index;
l_uint64  key;
L_DNA    *da;

    PROCNAME("sarrayFindStringByHash");

    if (!pindex)
        return ERROR_INT("&index not defined", procName, 1);
    *pindex = -1;
    if (!sa)
        return ERROR_INT("sa not defined", procName, 1);
    if (!dahash)
        return ERROR_INT("dahash not defined", procName, 1);

    l_hashStringToUint64(str, &key);
    da = l_dnaHashGetDna(dahash, key, L_NOCOPY);
    if (!da) return 0;

        /* Run through the da, looking for this string */
    nvals = l_dnaGetCount(da);
    for (i = 0; i < nvals; i++) {
        l_dnaGetIValue(da, i, &index);
        stri = sarrayGetString(sa, index, L_NOCOPY);
        if (!strcmp(str, stri)) {  /* duplicate */
            *pindex = index;
            return 0;
        }
    }

    return 0;
}
Example #8
0
main(int    argc,
     char **argv)
{
l_int32       i, nbins, ival;
l_float64     pi, angle, val, sum;
L_DNA        *da1, *da2, *da3, *da4, *da5;
L_DNAA       *daa1, *daa2;
GPLOT        *gplot;
NUMA         *na, *nahisto, *nax;
L_REGPARAMS  *rp;

    if (regTestSetup(argc, argv, &rp))
        return 1;

    pi = 3.1415926535;
    da1 = l_dnaCreate(50);
    for (i = 0; i < 5000; i++) {
        angle = 0.02293 * i * pi;
        val = 999. * sin(angle);
        l_dnaAddNumber(da1, val);
    }

        /* Conversion to Numa; I/O for Dna */
    na = l_dnaConvertToNuma(da1);
    da2 = numaConvertToDna(na);
    l_dnaWrite("/tmp/dna1.da", da1);
    l_dnaWrite("/tmp/dna2.da", da2);
    da3 = l_dnaRead("/tmp/dna2.da");
    l_dnaWrite("/tmp/dna3.da", da3);
    regTestCheckFile(rp, "/tmp/dna1.da");  /* 0 */
    regTestCheckFile(rp, "/tmp/dna2.da");  /* 1 */
    regTestCheckFile(rp, "/tmp/dna3.da");  /* 2 */
    regTestCompareFiles(rp, 1, 2);  /* 3 */

        /* I/O for Dnaa */
    daa1 = l_dnaaCreate(3);
    l_dnaaAddDna(daa1, da1, L_INSERT);
    l_dnaaAddDna(daa1, da2, L_INSERT);
    l_dnaaAddDna(daa1, da3, L_INSERT);
    l_dnaaWrite("/tmp/dnaa1.daa", daa1);
    daa2 = l_dnaaRead("/tmp/dnaa1.daa");
    l_dnaaWrite("/tmp/dnaa2.daa", daa2);
    regTestCheckFile(rp, "/tmp/dnaa1.daa");  /* 4 */
    regTestCheckFile(rp, "/tmp/dnaa2.daa");  /* 5 */
    regTestCompareFiles(rp, 4, 5);  /* 6 */
    l_dnaaDestroy(&daa1);
    l_dnaaDestroy(&daa2);

        /* Just for fun -- is the numa ok? */
    nahisto = numaMakeHistogramClipped(na, 12, 2000);
    nbins = numaGetCount(nahisto);
    nax = numaMakeSequence(0, 1, nbins);
    gplot = gplotCreate("/tmp/historoot", GPLOT_PNG, "Histo example",
                        "i", "histo[i]");
    gplotAddPlot(gplot, nax, nahisto, GPLOT_LINES, "sine");
    gplotMakeOutput(gplot);
#ifndef  _WIN32
    sleep(1);
#else
    Sleep(1000);
#endif  /* _WIN32 */
    regTestCheckFile(rp, "/tmp/historoot.png");  /* 7 */
    gplotDestroy(&gplot);
    numaDestroy(&na);
    numaDestroy(&nax);
    numaDestroy(&nahisto);

        /* Handling precision of int32 in double */
    da4 = l_dnaCreate(25);
    for (i = 0; i < 1000; i++)
        l_dnaAddNumber(da4, 1928374 * i);
    l_dnaWrite("/tmp/dna4.da", da4);
    da5 = l_dnaRead("/tmp/dna4.da");
    sum = 0;
    for (i = 0; i < 1000; i++) {
        l_dnaGetIValue(da5, i, &ival);
        sum += L_ABS(ival - i * 1928374);  /* we better be adding 0 each time */
    }
    regTestCompareValues(rp, sum, 0.0, 0.0);  /* 8 */
    l_dnaDestroy(&da4);
    l_dnaDestroy(&da5);

    return regTestCleanup(rp);
}