/*! * \brief recogGetClassIndex() * * \param[in] recog with LUT's pre-computed * \param[in] val integer value; can be up to 3 bytes for UTF-8 * \param[in] text text from which %val was derived; used if not found * \param[out] pindex index into dna_tochar * \return 0 if found; 1 if not found and added; 2 on error. * * <pre> * Notes: * (1) This is used during training. There is one entry in * recog->dna_tochar (integer value, e.g., ascii) and * one in recog->sa_text (e.g, ascii letter in a string) * for each character class. * (2) This searches the dna character array for %val. If it is * not found, the template represents a character class not * already seen: it increments setsize (the number of character * classes) by 1, and augments both the index (dna_tochar) * and text (sa_text) arrays. * (3) Returns the index in &index, except on error. * (4) Caller must check the function return value. * </pre> */ l_int32 recogGetClassIndex(L_RECOG *recog, l_int32 val, char *text, l_int32 *pindex) { l_int32 i, n, ival; PROCNAME("recogGetClassIndex"); if (!pindex) return ERROR_INT("&index not defined", procName, 2); *pindex = -1; if (!recog) return ERROR_INT("recog not defined", procName, 2); if (!text) return ERROR_INT("text not defined", procName, 2); /* Search existing characters */ n = l_dnaGetCount(recog->dna_tochar); for (i = 0; i < n; i++) { l_dnaGetIValue(recog->dna_tochar, i, &ival); if (val == ival) { /* found */ *pindex = i; return 0; } } /* If not found... */ l_dnaAddNumber(recog->dna_tochar, val); sarrayAddString(recog->sa_text, text, L_COPY); recog->setsize++; *pindex = n; return 1; }
/*! * \brief l_dnaMakeHistoByHash() * * \param[in] das * \param[out] pdahash hash map: val --> index * \param[out] pdav array of values: index --> val * \param[out] pdac histo array of counts: index --> count * \return 0 if OK; 1 on error * * <pre> * Notes: * (1) Generates and returns a dna of occurrences (histogram), * an aligned dna of values, and an associated hashmap. * The hashmap takes %dav and a value, and points into the * histogram in %dac. * (2) The dna of values, %dav, is aligned with the histogram %dac, * and is needed for fast lookup. It is a hash set, because * the values are unique. * (3) Lookup is simple: * l_dnaFindValByHash(dav, dahash, val, &index); * if (index >= 0) * l_dnaGetIValue(dac, index, &icount); * else * icount = 0; * </pre> */ l_ok l_dnaMakeHistoByHash(L_DNA *das, L_DNAHASH **pdahash, L_DNA **pdav, L_DNA **pdac) { l_int32 i, n, nitems, index, count; l_uint32 nsize; l_uint64 key; l_float64 val; L_DNA *dac, *dav; L_DNAHASH *dahash; PROCNAME("l_dnaMakeHistoByHash"); if (pdahash) *pdahash = NULL; if (pdac) *pdac = NULL; if (pdav) *pdav = NULL; if (!pdahash || !pdac || !pdav) return ERROR_INT("&dahash, &dac, &dav not all defined", procName, 1); if (!das) return ERROR_INT("das not defined", procName, 1); if ((n = l_dnaGetCount(das)) == 0) return ERROR_INT("no data in das", procName, 1); findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */ dahash = l_dnaHashCreate(nsize, 8); dac = l_dnaCreate(n); /* histogram */ dav = l_dnaCreate(n); /* the values */ for (i = 0, nitems = 0; i < n; i++) { l_dnaGetDValue(das, i, &val); /* Is this value already stored in dav? */ l_dnaFindValByHash(dav, dahash, val, &index); if (index >= 0) { /* found */ l_dnaGetIValue(dac, (l_float64)index, &count); l_dnaSetValue(dac, (l_float64)index, count + 1); } else { /* not found */ l_hashFloat64ToUint64(nsize, val, &key); l_dnaHashAdd(dahash, key, (l_float64)nitems); l_dnaAddNumber(dav, val); l_dnaAddNumber(dac, 1); nitems++; } } *pdahash = dahash; *pdac = dac; *pdav = dav; return 0; }
/*! * l_dnaMakeDelta() * * Input: das (input l_dna) * Return: dad (of difference values val[i+1] - val[i]), * or null on error */ L_DNA * l_dnaMakeDelta(L_DNA *das) { l_int32 i, n, prev, cur; L_DNA *dad; PROCNAME("l_dnaMakeDelta"); if (!das) return (L_DNA *)ERROR_PTR("das not defined", procName, NULL); n = l_dnaGetCount(das); dad = l_dnaCreate(n - 1); prev = 0; for (i = 1; i < n; i++) { l_dnaGetIValue(das, i, &cur); l_dnaAddNumber(dad, cur - prev); prev = cur; } return dad; }
/*! * l_dnaGetIArray() * * Input: da * Return: a copy of the bare internal array, integerized * by rounding, or null on error * Notes: * (1) A copy of the array is made, because we need to * generate an integer array from the bare double array. * The caller is responsible for freeing the array. * (2) The array size is determined by the number of stored numbers, * not by the size of the allocated array in the l_dna. * (3) This function is provided to simplify calculations * using the bare internal array, rather than continually * calling accessors on the l_dna. It is typically used * on an array of size 256. */ l_int32 * l_dnaGetIArray(L_DNA *da) { l_int32 i, n, ival; l_int32 *array; PROCNAME("l_dnaGetIArray"); if (!da) return (l_int32 *)ERROR_PTR("da not defined", procName, NULL); n = l_dnaGetCount(da); if ((array = (l_int32 *)CALLOC(n, sizeof(l_int32))) == NULL) return (l_int32 *)ERROR_PTR("array not made", procName, NULL); for (i = 0; i < n; i++) { l_dnaGetIValue(da, i, &ival); array[i] = ival; } return array; }
/*! * \brief l_dnaFindValByHash() * * \param[in] da * \param[in] dahash containing indices into %da * \param[in] val searching for this number in %da * \param[out] pindex index into da if found; -1 otherwise * \return 0 if OK; 1 on error * * <pre> * Notes: * (1) Algo: hash %val into a key; hash the key to get the dna * in %dahash (that holds indices into %da); traverse * the dna of indices looking for %val in %da. * </pre> */ l_ok l_dnaFindValByHash(L_DNA *da, L_DNAHASH *dahash, l_float64 val, l_int32 *pindex) { l_int32 i, nbuckets, nvals, indexval; l_float64 vali; l_uint64 key; L_DNA *da1; PROCNAME("l_dnaFindValByHash"); if (!pindex) return ERROR_INT("&index not defined", procName, 1); *pindex = -1; if (!da) return ERROR_INT("da not defined", procName, 1); if (!dahash) return ERROR_INT("dahash not defined", procName, 1); nbuckets = l_dnaHashGetCount(dahash); l_hashFloat64ToUint64(nbuckets, val, &key); da1 = l_dnaHashGetDna(dahash, key, L_NOCOPY); if (!da1) return 0; /* Run through da1, looking for this %val */ nvals = l_dnaGetCount(da1); for (i = 0; i < nvals; i++) { l_dnaGetIValue(da1, i, &indexval); l_dnaGetDValue(da, indexval, &vali); if (val == vali) { *pindex = indexval; return 0; } } return 0; }
/*! * ptaFindPtByHash() * * Input: pta * dahash (built from pta) * x, y (arbitrary points) * &index (<return> index into pta if (x,y) is in pta; * -1 otherwise) * Return: 0 if OK, 1 on error * * Notes: * (1) Fast lookup in dnaHash associated with a pta, to see if a * random point (x,y) is already stored in the hash table. */ l_int32 ptaFindPtByHash(PTA *pta, L_DNAHASH *dahash, l_int32 x, l_int32 y, l_int32 *pindex) { l_int32 i, nbuckets, nvals, index, xi, yi; l_uint64 key; L_DNA *da; PROCNAME("ptaFindPtByHash"); if (!pindex) return ERROR_INT("&index not defined", procName, 1); *pindex = -1; if (!pta) return ERROR_INT("pta not defined", procName, 1); if (!dahash) return ERROR_INT("dahash not defined", procName, 1); nbuckets = l_dnaHashGetCount(dahash); l_hashPtToUint64Fast(nbuckets, x, y, &key); da = l_dnaHashGetDna(dahash, key, L_NOCOPY); if (!da) return 0; /* Run through the da, looking for this point */ nvals = l_dnaGetCount(da); for (i = 0; i < nvals; i++) { l_dnaGetIValue(da, i, &index); ptaGetIPt(pta, index, &xi, &yi); if (x == xi && y == yi) { *pindex = index; return 0; } } return 0; }
/*! * \brief sarrayFindStringByHash() * * \param[in] sa * \param[in] dahash built from sa * \param[in] str arbitrary string * \param[out] pindex index into %sa if %str is in %sa; * -1 otherwise * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) Fast lookup in dnaHash associated with a sarray, to see if a * random string %str is already stored in the hash table. * </pre> */ l_int32 sarrayFindStringByHash(SARRAY *sa, L_DNAHASH *dahash, const char *str, l_int32 *pindex) { char *stri; l_int32 i, nvals, index; l_uint64 key; L_DNA *da; PROCNAME("sarrayFindStringByHash"); if (!pindex) return ERROR_INT("&index not defined", procName, 1); *pindex = -1; if (!sa) return ERROR_INT("sa not defined", procName, 1); if (!dahash) return ERROR_INT("dahash not defined", procName, 1); l_hashStringToUint64(str, &key); da = l_dnaHashGetDna(dahash, key, L_NOCOPY); if (!da) return 0; /* Run through the da, looking for this string */ nvals = l_dnaGetCount(da); for (i = 0; i < nvals; i++) { l_dnaGetIValue(da, i, &index); stri = sarrayGetString(sa, index, L_NOCOPY); if (!strcmp(str, stri)) { /* duplicate */ *pindex = index; return 0; } } return 0; }
main(int argc, char **argv) { l_int32 i, nbins, ival; l_float64 pi, angle, val, sum; L_DNA *da1, *da2, *da3, *da4, *da5; L_DNAA *daa1, *daa2; GPLOT *gplot; NUMA *na, *nahisto, *nax; L_REGPARAMS *rp; if (regTestSetup(argc, argv, &rp)) return 1; pi = 3.1415926535; da1 = l_dnaCreate(50); for (i = 0; i < 5000; i++) { angle = 0.02293 * i * pi; val = 999. * sin(angle); l_dnaAddNumber(da1, val); } /* Conversion to Numa; I/O for Dna */ na = l_dnaConvertToNuma(da1); da2 = numaConvertToDna(na); l_dnaWrite("/tmp/dna1.da", da1); l_dnaWrite("/tmp/dna2.da", da2); da3 = l_dnaRead("/tmp/dna2.da"); l_dnaWrite("/tmp/dna3.da", da3); regTestCheckFile(rp, "/tmp/dna1.da"); /* 0 */ regTestCheckFile(rp, "/tmp/dna2.da"); /* 1 */ regTestCheckFile(rp, "/tmp/dna3.da"); /* 2 */ regTestCompareFiles(rp, 1, 2); /* 3 */ /* I/O for Dnaa */ daa1 = l_dnaaCreate(3); l_dnaaAddDna(daa1, da1, L_INSERT); l_dnaaAddDna(daa1, da2, L_INSERT); l_dnaaAddDna(daa1, da3, L_INSERT); l_dnaaWrite("/tmp/dnaa1.daa", daa1); daa2 = l_dnaaRead("/tmp/dnaa1.daa"); l_dnaaWrite("/tmp/dnaa2.daa", daa2); regTestCheckFile(rp, "/tmp/dnaa1.daa"); /* 4 */ regTestCheckFile(rp, "/tmp/dnaa2.daa"); /* 5 */ regTestCompareFiles(rp, 4, 5); /* 6 */ l_dnaaDestroy(&daa1); l_dnaaDestroy(&daa2); /* Just for fun -- is the numa ok? */ nahisto = numaMakeHistogramClipped(na, 12, 2000); nbins = numaGetCount(nahisto); nax = numaMakeSequence(0, 1, nbins); gplot = gplotCreate("/tmp/historoot", GPLOT_PNG, "Histo example", "i", "histo[i]"); gplotAddPlot(gplot, nax, nahisto, GPLOT_LINES, "sine"); gplotMakeOutput(gplot); #ifndef _WIN32 sleep(1); #else Sleep(1000); #endif /* _WIN32 */ regTestCheckFile(rp, "/tmp/historoot.png"); /* 7 */ gplotDestroy(&gplot); numaDestroy(&na); numaDestroy(&nax); numaDestroy(&nahisto); /* Handling precision of int32 in double */ da4 = l_dnaCreate(25); for (i = 0; i < 1000; i++) l_dnaAddNumber(da4, 1928374 * i); l_dnaWrite("/tmp/dna4.da", da4); da5 = l_dnaRead("/tmp/dna4.da"); sum = 0; for (i = 0; i < 1000; i++) { l_dnaGetIValue(da5, i, &ival); sum += L_ABS(ival - i * 1928374); /* we better be adding 0 each time */ } regTestCompareValues(rp, sum, 0.0, 0.0); /* 8 */ l_dnaDestroy(&da4); l_dnaDestroy(&da5); return regTestCleanup(rp); }