/*! * \brief l_dnaIntersectionByHash() * * \param[in] da1, da2 * \return dad intersection of the number arrays, or NULL on error * * <pre> * Notes: * (1) This uses the same method for building the intersection set * as ptaIntersectionByHash() and sarrayIntersectionByHash(). * </pre> */ L_DNA * l_dnaIntersectionByHash(L_DNA *da1, L_DNA *da2) { l_int32 n1, n2, nsmall, nbuckets, i, index1, index2; l_uint32 nsize2; l_uint64 key; l_float64 val; L_DNAHASH *dahash1, *dahash2; L_DNA *da_small, *da_big, *dad; PROCNAME("l_dnaIntersectionByHash"); if (!da1) return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL); if (!da2) return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL); /* Put the elements of the biggest array into a dnahash */ n1 = l_dnaGetCount(da1); n2 = l_dnaGetCount(da2); da_small = (n1 < n2) ? da1 : da2; /* do not destroy da_small */ da_big = (n1 < n2) ? da2 : da1; /* do not destroy da_big */ dahash1 = l_dnaHashCreateFromDna(da_big); /* Build up the intersection of numbers. Add to %dad * if the number is in da_big (using dahash1) but hasn't * yet been seen in the traversal of da_small (using dahash2). */ dad = l_dnaCreate(0); nsmall = l_dnaGetCount(da_small); findNextLargerPrime(nsmall / 20, &nsize2); /* buckets in hash table */ dahash2 = l_dnaHashCreate(nsize2, 0); nbuckets = l_dnaHashGetCount(dahash2); for (i = 0; i < nsmall; i++) { l_dnaGetDValue(da_small, i, &val); l_dnaFindValByHash(da_big, dahash1, val, &index1); if (index1 >= 0) { /* found */ l_dnaFindValByHash(da_small, dahash2, val, &index2); if (index2 == -1) { /* not found */ l_dnaAddNumber(dad, val); l_hashFloat64ToUint64(nbuckets, val, &key); l_dnaHashAdd(dahash2, key, (l_float64)i); } } } l_dnaHashDestroy(&dahash1); l_dnaHashDestroy(&dahash2); return dad; }
/*! * \brief recogGetClassIndex() * * \param[in] recog with LUT's pre-computed * \param[in] val integer value; can be up to 3 bytes for UTF-8 * \param[in] text text from which %val was derived; used if not found * \param[out] pindex index into dna_tochar * \return 0 if found; 1 if not found and added; 2 on error. * * <pre> * Notes: * (1) This is used during training. There is one entry in * recog->dna_tochar (integer value, e.g., ascii) and * one in recog->sa_text (e.g, ascii letter in a string) * for each character class. * (2) This searches the dna character array for %val. If it is * not found, the template represents a character class not * already seen: it increments setsize (the number of character * classes) by 1, and augments both the index (dna_tochar) * and text (sa_text) arrays. * (3) Returns the index in &index, except on error. * (4) Caller must check the function return value. * </pre> */ l_int32 recogGetClassIndex(L_RECOG *recog, l_int32 val, char *text, l_int32 *pindex) { l_int32 i, n, ival; PROCNAME("recogGetClassIndex"); if (!pindex) return ERROR_INT("&index not defined", procName, 2); *pindex = -1; if (!recog) return ERROR_INT("recog not defined", procName, 2); if (!text) return ERROR_INT("text not defined", procName, 2); /* Search existing characters */ n = l_dnaGetCount(recog->dna_tochar); for (i = 0; i < n; i++) { l_dnaGetIValue(recog->dna_tochar, i, &ival); if (val == ival) { /* found */ *pindex = i; return 0; } } /* If not found... */ l_dnaAddNumber(recog->dna_tochar, val); sarrayAddString(recog->sa_text, text, L_COPY); recog->setsize++; *pindex = n; return 1; }
/*! * \brief l_dnaRemoveDupsByAset() * * \param[in] das * \return dad with duplicates removed, or NULL on error */ L_DNA * l_dnaRemoveDupsByAset(L_DNA *das) { l_int32 i, n; l_float64 val; L_DNA *dad; L_ASET *set; RB_TYPE key; PROCNAME("l_dnaRemoveDupsByAset"); if (!das) return (L_DNA *)ERROR_PTR("das not defined", procName, NULL); set = l_asetCreate(L_FLOAT_TYPE); dad = l_dnaCreate(0); n = l_dnaGetCount(das); for (i = 0; i < n; i++) { l_dnaGetDValue(das, i, &val); key.ftype = val; if (!l_asetFind(set, key)) { l_dnaAddNumber(dad, val); l_asetInsert(set, key); } } l_asetDestroy(&set); return dad; }
/*! * l_dnaJoin() * * Input: dad (dest dma; add to this one) * das (<optional> source dna; add from this one) * istart (starting index in das) * iend (ending index in das; use -1 to cat all) * Return: 0 if OK, 1 on error * * Notes: * (1) istart < 0 is taken to mean 'read from the start' (istart = 0) * (2) iend < 0 means 'read to the end' * (3) if das == NULL, this is a no-op */ l_int32 l_dnaJoin(L_DNA *dad, L_DNA *das, l_int32 istart, l_int32 iend) { l_int32 n, i; l_float64 val; PROCNAME("l_dnaJoin"); if (!dad) return ERROR_INT("dad not defined", procName, 1); if (!das) return 0; if (istart < 0) istart = 0; n = l_dnaGetCount(das); if (iend < 0 || iend >= n) iend = n - 1; if (istart > iend) return ERROR_INT("istart > iend; nothing to add", procName, 1); for (i = istart; i <= iend; i++) { l_dnaGetDValue(das, i, &val); l_dnaAddNumber(dad, val); } return 0; }
/*! * l_dnaWriteStream() * * Input: stream, da * Return: 0 if OK, 1 on error */ l_int32 l_dnaWriteStream(FILE *fp, L_DNA *da) { l_int32 i, n; l_float64 startx, delx; PROCNAME("l_dnaWriteStream"); if (!fp) return ERROR_INT("stream not defined", procName, 1); if (!da) return ERROR_INT("da not defined", procName, 1); n = l_dnaGetCount(da); fprintf(fp, "\nL_Dna Version %d\n", DNA_VERSION_NUMBER); fprintf(fp, "Number of numbers = %d\n", n); for (i = 0; i < n; i++) fprintf(fp, " [%d] = %lf\n", i, da->array[i]); fprintf(fp, "\n"); /* Optional data */ l_dnaGetParameters(da, &startx, &delx); if (startx != 0.0 || delx != 1.0) fprintf(fp, "startx = %lf, delx = %lf\n", startx, delx); return 0; }
/*! * \brief l_dnaHashCreateFromDna() * * \param[in] da * \return dahash if OK; 1 on error * * <pre> * Notes: * (1) The values stored in the %dahash are indices into %da; * %dahash has no use without %da. * </pre> */ L_DNAHASH * l_dnaHashCreateFromDna(L_DNA *da) { l_int32 i, n; l_uint32 nsize; l_uint64 key; l_float64 val; L_DNAHASH *dahash; PROCNAME("l_dnaHashCreateFromDna"); if (!da) return (L_DNAHASH *)ERROR_PTR("da not defined", procName, NULL); n = l_dnaGetCount(da); findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */ dahash = l_dnaHashCreate(nsize, 8); for (i = 0; i < n; i++) { l_dnaGetDValue(da, i, &val); l_hashFloat64ToUint64(nsize, val, &key); l_dnaHashAdd(dahash, key, (l_float64)i); } return dahash; }
/*! * \brief l_dnaIntersectionByAset() * * \param[in] da1, da2 * \return dad with the intersection of the two arrays, or NULL on error * * <pre> * Notes: * (1) See sarrayIntersection() for the approach. * (2) Here, the key in building the sorted tree is the number itself. * (3) Operations using an underlying tree are O(nlogn), which is * typically less efficient than hashing, which is O(n). * </pre> */ L_DNA * l_dnaIntersectionByAset(L_DNA *da1, L_DNA *da2) { l_int32 n1, n2, i, n; l_float64 val; L_ASET *set1, *set2; RB_TYPE key; L_DNA *da_small, *da_big, *dad; PROCNAME("l_dnaIntersectionByAset"); if (!da1) return (L_DNA *)ERROR_PTR("da1 not defined", procName, NULL); if (!da2) return (L_DNA *)ERROR_PTR("da2 not defined", procName, NULL); /* Put the elements of the largest array into a set */ n1 = l_dnaGetCount(da1); n2 = l_dnaGetCount(da2); da_small = (n1 < n2) ? da1 : da2; /* do not destroy da_small */ da_big = (n1 < n2) ? da2 : da1; /* do not destroy da_big */ set1 = l_asetCreateFromDna(da_big); /* Build up the intersection of floats */ dad = l_dnaCreate(0); n = l_dnaGetCount(da_small); set2 = l_asetCreate(L_FLOAT_TYPE); for (i = 0; i < n; i++) { l_dnaGetDValue(da_small, i, &val); key.ftype = val; if (l_asetFind(set1, key) && !l_asetFind(set2, key)) { l_dnaAddNumber(dad, val); l_asetInsert(set2, key); } } l_asetDestroy(&set1); l_asetDestroy(&set2); return dad; }
/*! * l_dnaaGetDnaCount() * * Input: daa * index (of l_dna in daa) * Return: count of numbers in the referenced l_dna, or 0 on error. */ l_int32 l_dnaaGetDnaCount(L_DNAA *daa, l_int32 index) { PROCNAME("l_dnaaGetDnaCount"); if (!daa) return ERROR_INT("daa not defined", procName, 0); if (index < 0 || index >= daa->n) return ERROR_INT("invalid index into daa", procName, 0); return l_dnaGetCount(daa->dna[index]); }
/*! * \brief l_dnaMakeHistoByHash() * * \param[in] das * \param[out] pdahash hash map: val --> index * \param[out] pdav array of values: index --> val * \param[out] pdac histo array of counts: index --> count * \return 0 if OK; 1 on error * * <pre> * Notes: * (1) Generates and returns a dna of occurrences (histogram), * an aligned dna of values, and an associated hashmap. * The hashmap takes %dav and a value, and points into the * histogram in %dac. * (2) The dna of values, %dav, is aligned with the histogram %dac, * and is needed for fast lookup. It is a hash set, because * the values are unique. * (3) Lookup is simple: * l_dnaFindValByHash(dav, dahash, val, &index); * if (index >= 0) * l_dnaGetIValue(dac, index, &icount); * else * icount = 0; * </pre> */ l_ok l_dnaMakeHistoByHash(L_DNA *das, L_DNAHASH **pdahash, L_DNA **pdav, L_DNA **pdac) { l_int32 i, n, nitems, index, count; l_uint32 nsize; l_uint64 key; l_float64 val; L_DNA *dac, *dav; L_DNAHASH *dahash; PROCNAME("l_dnaMakeHistoByHash"); if (pdahash) *pdahash = NULL; if (pdac) *pdac = NULL; if (pdav) *pdav = NULL; if (!pdahash || !pdac || !pdav) return ERROR_INT("&dahash, &dac, &dav not all defined", procName, 1); if (!das) return ERROR_INT("das not defined", procName, 1); if ((n = l_dnaGetCount(das)) == 0) return ERROR_INT("no data in das", procName, 1); findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */ dahash = l_dnaHashCreate(nsize, 8); dac = l_dnaCreate(n); /* histogram */ dav = l_dnaCreate(n); /* the values */ for (i = 0, nitems = 0; i < n; i++) { l_dnaGetDValue(das, i, &val); /* Is this value already stored in dav? */ l_dnaFindValByHash(dav, dahash, val, &index); if (index >= 0) { /* found */ l_dnaGetIValue(dac, (l_float64)index, &count); l_dnaSetValue(dac, (l_float64)index, count + 1); } else { /* not found */ l_hashFloat64ToUint64(nsize, val, &key); l_dnaHashAdd(dahash, key, (l_float64)nitems); l_dnaAddNumber(dav, val); l_dnaAddNumber(dac, 1); nitems++; } } *pdahash = dahash; *pdac = dac; *pdav = dav; return 0; }
/*! * l_dnaAddNumber() * * Input: da * val (float or int to be added; stored as a float) * Return: 0 if OK, 1 on error */ l_int32 l_dnaAddNumber(L_DNA *da, l_float64 val) { l_int32 n; PROCNAME("l_dnaAddNumber"); if (!da) return ERROR_INT("da not defined", procName, 1); n = l_dnaGetCount(da); if (n >= da->nalloc) l_dnaExtendArray(da); da->array[n] = val; da->n++; return 0; }
/*! * l_dnaReplaceNumber() * * Input: da * index (element to be replaced) * val (new value to replace old one) * Return: 0 if OK, 1 on error */ l_int32 l_dnaReplaceNumber(L_DNA *da, l_int32 index, l_float64 val) { l_int32 n; PROCNAME("l_dnaReplaceNumber"); if (!da) return ERROR_INT("da not defined", procName, 1); n = l_dnaGetCount(da); if (index < 0 || index >= n) return ERROR_INT("index not in {0...n - 1}", procName, 1); da->array[index] = val; return 0; }
/*! * l_dnaRemoveNumber() * * Input: da * index (element to be removed) * Return: 0 if OK, 1 on error * * Notes: * (1) This shifts da[i] --> da[i - 1] for all i > index. * (2) It should not be used repeatedly on large arrays, * because the function is O(n). */ l_int32 l_dnaRemoveNumber(L_DNA *da, l_int32 index) { l_int32 i, n; PROCNAME("l_dnaRemoveNumber"); if (!da) return ERROR_INT("da not defined", procName, 1); n = l_dnaGetCount(da); if (index < 0 || index >= n) return ERROR_INT("index not in {0...n - 1}", procName, 1); for (i = index + 1; i < n; i++) da->array[i - 1] = da->array[i]; da->n--; return 0; }
/*! * \brief l_dnaRemoveDupsByHash() * * \param[in] das * \param[out] pdad hash set * \param[out] pdahash [optional] dnahash used for lookup * \return 0 if OK; 1 on error * * <pre> * Notes: * (1) Generates a dna with unique values. * (2) The dnahash is built up with dad to assure uniqueness. * It can be used to find if an element is in the set: * l_dnaFindValByHash(dad, dahash, val, &index) * </pre> */ l_ok l_dnaRemoveDupsByHash(L_DNA *das, L_DNA **pdad, L_DNAHASH **pdahash) { l_int32 i, n, index, items; l_uint32 nsize; l_uint64 key; l_float64 val; L_DNA *dad; L_DNAHASH *dahash; PROCNAME("l_dnaRemoveDupsByHash"); if (pdahash) *pdahash = NULL; if (!pdad) return ERROR_INT("&dad not defined", procName, 1); *pdad = NULL; if (!das) return ERROR_INT("das not defined", procName, 1); n = l_dnaGetCount(das); findNextLargerPrime(n / 20, &nsize); /* buckets in hash table */ dahash = l_dnaHashCreate(nsize, 8); dad = l_dnaCreate(n); *pdad = dad; for (i = 0, items = 0; i < n; i++) { l_dnaGetDValue(das, i, &val); l_dnaFindValByHash(dad, dahash, val, &index); if (index < 0) { /* not found */ l_hashFloat64ToUint64(nsize, val, &key); l_dnaHashAdd(dahash, key, (l_float64)items); l_dnaAddNumber(dad, val); items++; } } if (pdahash) *pdahash = dahash; else l_dnaHashDestroy(&dahash); return 0; }
/*! * \brief l_dnaHashGetTotalCount() * * \param[in] dahash * \return n number of numbers in all dna, or 0 on error */ l_int32 l_dnaHashGetTotalCount(L_DNAHASH *dahash) { l_int32 i, n; L_DNA *da; PROCNAME("l_dnaHashGetTotalCount"); if (!dahash) return ERROR_INT("dahash not defined", procName, 0); for (i = 0, n = 0; i < dahash->nbuckets; i++) { da = l_dnaHashGetDna(dahash, i, L_NOCOPY); if (da) n += l_dnaGetCount(da); } return n; }
/*! * l_dnaMakeDelta() * * Input: das (input l_dna) * Return: dad (of difference values val[i+1] - val[i]), * or null on error */ L_DNA * l_dnaMakeDelta(L_DNA *das) { l_int32 i, n, prev, cur; L_DNA *dad; PROCNAME("l_dnaMakeDelta"); if (!das) return (L_DNA *)ERROR_PTR("das not defined", procName, NULL); n = l_dnaGetCount(das); dad = l_dnaCreate(n - 1); prev = 0; for (i = 1; i < n; i++) { l_dnaGetIValue(das, i, &cur); l_dnaAddNumber(dad, cur - prev); prev = cur; } return dad; }
/*! * l_dnaConvertToNuma() * * Input: da * Return: na, or null on error */ NUMA * l_dnaConvertToNuma(L_DNA *da) { l_int32 i, n; l_float64 val; NUMA *na; PROCNAME("l_dnaConvertToNuma"); if (!da) return (NUMA *)ERROR_PTR("da not defined", procName, NULL); n = l_dnaGetCount(da); na = numaCreate(n); for (i = 0; i < n; i++) { l_dnaGetDValue(da, i, &val); numaAddNumber(na, val); } return na; }
/*! * l_dnaaGetNumberCount() * * Input: daa * Return: count (total number of numbers in the l_dnaa), * or 0 if no numbers or on error */ l_int32 l_dnaaGetNumberCount(L_DNAA *daa) { L_DNA *da; l_int32 n, sum, i; PROCNAME("l_dnaaGetNumberCount"); if (!daa) return ERROR_INT("daa not defined", procName, 0); n = l_dnaaGetCount(daa); for (sum = 0, i = 0; i < n; i++) { da = l_dnaaGetDna(daa, i, L_CLONE); sum += l_dnaGetCount(da); l_dnaDestroy(&da); } return sum; }
/*! * l_dnaGetIArray() * * Input: da * Return: a copy of the bare internal array, integerized * by rounding, or null on error * Notes: * (1) A copy of the array is made, because we need to * generate an integer array from the bare double array. * The caller is responsible for freeing the array. * (2) The array size is determined by the number of stored numbers, * not by the size of the allocated array in the l_dna. * (3) This function is provided to simplify calculations * using the bare internal array, rather than continually * calling accessors on the l_dna. It is typically used * on an array of size 256. */ l_int32 * l_dnaGetIArray(L_DNA *da) { l_int32 i, n, ival; l_int32 *array; PROCNAME("l_dnaGetIArray"); if (!da) return (l_int32 *)ERROR_PTR("da not defined", procName, NULL); n = l_dnaGetCount(da); if ((array = (l_int32 *)CALLOC(n, sizeof(l_int32))) == NULL) return (l_int32 *)ERROR_PTR("array not made", procName, NULL); for (i = 0; i < n; i++) { l_dnaGetIValue(da, i, &ival); array[i] = ival; } return array; }
/*! * ptaFindPtByHash() * * Input: pta * dahash (built from pta) * x, y (arbitrary points) * &index (<return> index into pta if (x,y) is in pta; * -1 otherwise) * Return: 0 if OK, 1 on error * * Notes: * (1) Fast lookup in dnaHash associated with a pta, to see if a * random point (x,y) is already stored in the hash table. */ l_int32 ptaFindPtByHash(PTA *pta, L_DNAHASH *dahash, l_int32 x, l_int32 y, l_int32 *pindex) { l_int32 i, nbuckets, nvals, index, xi, yi; l_uint64 key; L_DNA *da; PROCNAME("ptaFindPtByHash"); if (!pindex) return ERROR_INT("&index not defined", procName, 1); *pindex = -1; if (!pta) return ERROR_INT("pta not defined", procName, 1); if (!dahash) return ERROR_INT("dahash not defined", procName, 1); nbuckets = l_dnaHashGetCount(dahash); l_hashPtToUint64Fast(nbuckets, x, y, &key); da = l_dnaHashGetDna(dahash, key, L_NOCOPY); if (!da) return 0; /* Run through the da, looking for this point */ nvals = l_dnaGetCount(da); for (i = 0; i < nvals; i++) { l_dnaGetIValue(da, i, &index); ptaGetIPt(pta, index, &xi, &yi); if (x == xi && y == yi) { *pindex = index; return 0; } } return 0; }
/*! * \brief l_dnaFindValByHash() * * \param[in] da * \param[in] dahash containing indices into %da * \param[in] val searching for this number in %da * \param[out] pindex index into da if found; -1 otherwise * \return 0 if OK; 1 on error * * <pre> * Notes: * (1) Algo: hash %val into a key; hash the key to get the dna * in %dahash (that holds indices into %da); traverse * the dna of indices looking for %val in %da. * </pre> */ l_ok l_dnaFindValByHash(L_DNA *da, L_DNAHASH *dahash, l_float64 val, l_int32 *pindex) { l_int32 i, nbuckets, nvals, indexval; l_float64 vali; l_uint64 key; L_DNA *da1; PROCNAME("l_dnaFindValByHash"); if (!pindex) return ERROR_INT("&index not defined", procName, 1); *pindex = -1; if (!da) return ERROR_INT("da not defined", procName, 1); if (!dahash) return ERROR_INT("dahash not defined", procName, 1); nbuckets = l_dnaHashGetCount(dahash); l_hashFloat64ToUint64(nbuckets, val, &key); da1 = l_dnaHashGetDna(dahash, key, L_NOCOPY); if (!da1) return 0; /* Run through da1, looking for this %val */ nvals = l_dnaGetCount(da1); for (i = 0; i < nvals; i++) { l_dnaGetIValue(da1, i, &indexval); l_dnaGetDValue(da, indexval, &vali); if (val == vali) { *pindex = indexval; return 0; } } return 0; }
/*! * \brief sarrayFindStringByHash() * * \param[in] sa * \param[in] dahash built from sa * \param[in] str arbitrary string * \param[out] pindex index into %sa if %str is in %sa; * -1 otherwise * \return 0 if OK, 1 on error * * <pre> * Notes: * (1) Fast lookup in dnaHash associated with a sarray, to see if a * random string %str is already stored in the hash table. * </pre> */ l_int32 sarrayFindStringByHash(SARRAY *sa, L_DNAHASH *dahash, const char *str, l_int32 *pindex) { char *stri; l_int32 i, nvals, index; l_uint64 key; L_DNA *da; PROCNAME("sarrayFindStringByHash"); if (!pindex) return ERROR_INT("&index not defined", procName, 1); *pindex = -1; if (!sa) return ERROR_INT("sa not defined", procName, 1); if (!dahash) return ERROR_INT("dahash not defined", procName, 1); l_hashStringToUint64(str, &key); da = l_dnaHashGetDna(dahash, key, L_NOCOPY); if (!da) return 0; /* Run through the da, looking for this string */ nvals = l_dnaGetCount(da); for (i = 0; i < nvals; i++) { l_dnaGetIValue(da, i, &index); stri = sarrayGetString(sa, index, L_NOCOPY); if (!strcmp(str, stri)) { /* duplicate */ *pindex = index; return 0; } } return 0; }
/*! * \brief l_asetCreateFromDna() * * \param[in] da source dna * \return set using the doubles in %da as keys */ L_ASET * l_asetCreateFromDna(L_DNA *da) { l_int32 i, n; l_float64 val; L_ASET *set; RB_TYPE key; PROCNAME("l_asetCreateFromDna"); if (!da) return (L_ASET *)ERROR_PTR("da not defined", procName, NULL); set = l_asetCreate(L_FLOAT_TYPE); n = l_dnaGetCount(da); for (i = 0; i < n; i++) { l_dnaGetDValue(da, i, &val); key.ftype = val; l_asetInsert(set, key); } return set; }
/*! * l_dnaInsertNumber() * * Input: da * index (location in da to insert new value) * val (float64 or integer to be added) * Return: 0 if OK, 1 on error * * Notes: * (1) This shifts da[i] --> da[i + 1] for all i >= index, * and then inserts val as da[index]. * (2) It should not be used repeatedly on large arrays, * because the function is O(n). * */ l_int32 l_dnaInsertNumber(L_DNA *da, l_int32 index, l_float64 val) { l_int32 i, n; PROCNAME("l_dnaInsertNumber"); if (!da) return ERROR_INT("da not defined", procName, 1); n = l_dnaGetCount(da); if (index < 0 || index > n) return ERROR_INT("index not in {0...n}", procName, 1); if (n >= da->nalloc) l_dnaExtendArray(da); for (i = n; i > index; i--) da->array[i] = da->array[i - 1]; da->array[index] = val; da->n++; return 0; }
/*! * l_dnaGetDArray() * * Input: da * copyflag (L_NOCOPY or L_COPY) * Return: either the bare internal array or a copy of it, * or null on error * * Notes: * (1) If copyflag == L_COPY, it makes a copy which the caller * is responsible for freeing. Otherwise, it operates * directly on the bare array of the l_dna. * (2) Very important: for L_NOCOPY, any writes to the array * will be in the l_dna. Do not write beyond the size of * the count field, because it will not be accessible * from the l_dna! If necessary, be sure to set the count * field to a larger number (such as the alloc size) * BEFORE calling this function. Creating with l_dnaMakeConstant() * is another way to insure full initialization. */ l_float64 * l_dnaGetDArray(L_DNA *da, l_int32 copyflag) { l_int32 i, n; l_float64 *array; PROCNAME("l_dnaGetDArray"); if (!da) return (l_float64 *)ERROR_PTR("da not defined", procName, NULL); if (copyflag == L_NOCOPY) { array = da->array; } else { /* copyflag == L_COPY */ n = l_dnaGetCount(da); if ((array = (l_float64 *)CALLOC(n, sizeof(l_float64))) == NULL) return (l_float64 *)ERROR_PTR("array not made", procName, NULL); for (i = 0; i < n; i++) array[i] = da->array[i]; } return array; }
l_int32 main(int argc, char **argv) { L_ASET *set; L_DNA *da1, *da2, *da3, *da4, *da5, *da6, *da7, *da8, *dav, *dac; L_DNAHASH *dahash; NUMA *nav, *nac; PTA *pta1, *pta2, *pta3; SARRAY *sa1, *sa2, *sa3, *sa4; lept_mkdir("lept/hash"); #if 1 /* Test string hashing with aset */ fprintf(stderr, "Set results with string hashing:\n"); sa1 = BuildShortStrings(3, 0); sa2 = BuildShortStrings(3, 1); fprintf(stderr, " size with unique strings: %d\n", sarrayGetCount(sa1)); fprintf(stderr, " size with dups: %d\n", sarrayGetCount(sa2)); startTimer(); set = l_asetCreateFromSarray(sa2); fprintf(stderr, " time to make set: %5.3f sec\n", stopTimer()); fprintf(stderr, " size of set without dups: %d\n", l_asetSize(set)); l_asetDestroy(&set); startTimer(); sa3 = sarrayRemoveDupsByAset(sa2); fprintf(stderr, " time to remove dups: %5.3f sec\n", stopTimer()); fprintf(stderr, " size without dups = %d\n", sarrayGetCount(sa3)); startTimer(); sa4 = sarrayIntersectionByAset(sa1, sa2); fprintf(stderr, " time to intersect: %5.3f sec\n", stopTimer()); fprintf(stderr, " intersection size = %d\n", sarrayGetCount(sa4)); sarrayDestroy(&sa3); sarrayDestroy(&sa4); /* Test sarray set operations with dna hash. * We use the same hash function as is used with aset. */ fprintf(stderr, "\nDna hash results for sarray:\n"); fprintf(stderr, " size with unique strings: %d\n", sarrayGetCount(sa1)); fprintf(stderr, " size with dups: %d\n", sarrayGetCount(sa2)); startTimer(); dahash = l_dnaHashCreateFromSarray(sa2); fprintf(stderr, " time to make hashmap: %5.3f sec\n", stopTimer()); fprintf(stderr, " entries in hashmap with dups: %d\n", l_dnaHashGetTotalCount(dahash)); l_dnaHashDestroy(&dahash); startTimer(); sarrayRemoveDupsByHash(sa2, &sa3, NULL); fprintf(stderr, " time to remove dups: %5.3f sec\n", stopTimer()); fprintf(stderr, " size without dups = %d\n", sarrayGetCount(sa3)); startTimer(); sa4 = sarrayIntersectionByHash(sa1, sa2); fprintf(stderr, " time to intersect: %5.3f sec\n", stopTimer()); fprintf(stderr, " intersection size = %d\n", sarrayGetCount(sa4)); sarrayDestroy(&sa3); sarrayDestroy(&sa4); sarrayDestroy(&sa1); sarrayDestroy(&sa2); #endif #if 1 /* Test point hashing with aset. * Enter all points within a 1500 x 1500 image in pta1, and include * 450,000 duplicates in pta2. With this pt hashing function, * there are no hash collisions among any of the 400 million pixel * locations in a 20000 x 20000 image. */ pta1 = BuildPointSet(1500, 1500, 0); pta2 = BuildPointSet(1500, 1500, 1); fprintf(stderr, "\nSet results for pta:\n"); fprintf(stderr, " pta1 size with unique points: %d\n", ptaGetCount(pta1)); fprintf(stderr, " pta2 size with dups: %d\n", ptaGetCount(pta2)); startTimer(); pta3 = ptaRemoveDupsByAset(pta2); fprintf(stderr, " Time to remove dups: %5.3f sec\n", stopTimer()); fprintf(stderr, " size without dups = %d\n", ptaGetCount(pta3)); ptaDestroy(&pta3); startTimer(); pta3 = ptaIntersectionByAset(pta1, pta2); fprintf(stderr, " Time to intersect: %5.3f sec\n", stopTimer()); fprintf(stderr, " intersection size = %d\n", ptaGetCount(pta3)); ptaDestroy(&pta1); ptaDestroy(&pta2); ptaDestroy(&pta3); #endif #if 1 /* Test pta set operations with dna hash, using the same pt hashing * function. Although there are no collisions in 20K x 20K images, * the dna hash implementation works properly even if there are some. */ pta1 = BuildPointSet(1500, 1500, 0); pta2 = BuildPointSet(1500, 1500, 1); fprintf(stderr, "\nDna hash results for pta:\n"); fprintf(stderr, " pta1 size with unique points: %d\n", ptaGetCount(pta1)); fprintf(stderr, " pta2 size with dups: %d\n", ptaGetCount(pta2)); startTimer(); ptaRemoveDupsByHash(pta2, &pta3, NULL); fprintf(stderr, " Time to remove dups: %5.3f sec\n", stopTimer()); fprintf(stderr, " size without dups = %d\n", ptaGetCount(pta3)); ptaDestroy(&pta3); startTimer(); pta3 = ptaIntersectionByHash(pta1, pta2); fprintf(stderr, " Time to intersect: %5.3f sec\n", stopTimer()); fprintf(stderr, " intersection size = %d\n", ptaGetCount(pta3)); ptaDestroy(&pta1); ptaDestroy(&pta2); ptaDestroy(&pta3); #endif /* Test dna set and histo operations using dna hash */ #if 1 fprintf(stderr, "\nDna hash results for dna:\n"); da1 = l_dnaMakeSequence(0.0, 0.125, 8000); da2 = l_dnaMakeSequence(300.0, 0.125, 8000); da3 = l_dnaMakeSequence(600.0, 0.125, 8000); da4 = l_dnaMakeSequence(900.0, 0.125, 8000); da5 = l_dnaMakeSequence(1200.0, 0.125, 8000); l_dnaJoin(da1, da2, 0, -1); l_dnaJoin(da1, da3, 0, -1); l_dnaJoin(da1, da4, 0, -1); l_dnaJoin(da1, da5, 0, -1); l_dnaRemoveDupsByHash(da1, &da6, &dahash); l_dnaHashDestroy(&dahash); fprintf(stderr, " dna size with dups = %d\n", l_dnaGetCount(da1)); fprintf(stderr, " dna size of unique numbers = %d\n", l_dnaGetCount(da6)); l_dnaMakeHistoByHash(da1, &dahash, &dav, &dac); nav = l_dnaConvertToNuma(dav); nac = l_dnaConvertToNuma(dac); fprintf(stderr, " dna number of histo points = %d\n", l_dnaGetCount(dac)); gplotSimpleXY1(nav, nac, GPLOT_IMPULSES, GPLOT_PNG, "/tmp/lept/hash/histo", "Histo"); da7 = l_dnaIntersectionByHash(da2, da3); fprintf(stderr, " dna number of points: da2 = %d, da3 = %d\n", l_dnaGetCount(da2), l_dnaGetCount(da3)); fprintf(stderr, " dna number of da2/da3 intersection points = %d\n", l_dnaGetCount(da7)); l_fileDisplay("/tmp/lept/hash/histo.png", 700, 100, 1.0); l_dnaDestroy(&da1); l_dnaDestroy(&da2); l_dnaDestroy(&da3); l_dnaDestroy(&da4); l_dnaDestroy(&da5); l_dnaDestroy(&da6); l_dnaDestroy(&da7); l_dnaDestroy(&dac); l_dnaDestroy(&dav); l_dnaHashDestroy(&dahash); numaDestroy(&nav); numaDestroy(&nac); #endif #if 1 da1 = l_dnaMakeSequence(0, 3, 10000); da2 = l_dnaMakeSequence(0, 5, 10000); da3 = l_dnaMakeSequence(0, 7, 10000); l_dnaJoin(da1, da2, 0, -1); l_dnaJoin(da1, da3, 0, -1); fprintf(stderr, "\nDna results using set:\n"); fprintf(stderr, " da1 count: %d\n", l_dnaGetCount(da1)); set = l_asetCreateFromDna(da1); fprintf(stderr, " da1 set size: %d\n\n", l_asetSize(set)); l_asetDestroy(&set); da4 = l_dnaUnionByAset(da2, da3); fprintf(stderr, " da4 count: %d\n", l_dnaGetCount(da4)); set = l_asetCreateFromDna(da4); fprintf(stderr, " da4 set size: %d\n\n", l_asetSize(set)); l_asetDestroy(&set); da5 = l_dnaIntersectionByAset(da1, da2); fprintf(stderr, " da5 count: %d\n", l_dnaGetCount(da5)); set = l_asetCreateFromDna(da5); fprintf(stderr, " da5 set size: %d\n\n", l_asetSize(set)); l_asetDestroy(&set); da6 = l_dnaMakeSequence(100000, 11, 5000); l_dnaJoin(da6, da1, 0, -1); fprintf(stderr, " da6 count: %d\n", l_dnaGetCount(da6)); set = l_asetCreateFromDna(da6); fprintf(stderr, " da6 set size: %d\n\n", l_asetSize(set)); l_asetDestroy(&set); da7 = l_dnaIntersectionByAset(da6, da3); fprintf(stderr, " da7 count: %d\n", l_dnaGetCount(da7)); set = l_asetCreateFromDna(da7); fprintf(stderr, " da7 set size: %d\n\n", l_asetSize(set)); l_asetDestroy(&set); da8 = l_dnaRemoveDupsByAset(da1); fprintf(stderr, " da8 count: %d\n\n", l_dnaGetCount(da8)); l_dnaDestroy(&da1); l_dnaDestroy(&da2); l_dnaDestroy(&da3); l_dnaDestroy(&da4); l_dnaDestroy(&da5); l_dnaDestroy(&da6); l_dnaDestroy(&da7); l_dnaDestroy(&da8); #endif return 0; }
int main(int argc, char **argv) { char *str; l_uint8 *data1, *data2; l_int32 i, n, same1, same2; size_t size1, size2, slice, total, start, end; FILE *fp; L_DNA *da; SARRAY *sa; L_BYTEA *lba1, *lba2, *lba3, *lba4, *lba5; static char mainName[] = "byteatest"; if (argc != 1) return ERROR_INT("syntax: byteatest", mainName, 1); lept_mkdir("bytea"); /* Test basic init, join and split */ lba1 = l_byteaInitFromFile("feyn.tif"); lba2 = l_byteaInitFromFile("test24.jpg"); size1 = l_byteaGetSize(lba1); size2 = l_byteaGetSize(lba2); l_byteaJoin(lba1, &lba2); lba3 = l_byteaInitFromMem(lba1->data, size1); lba4 = l_byteaInitFromMem(lba1->data + size1, size2); /* Split by hand */ l_binaryWrite("/tmp/bytea/junk1.dat", "w", lba3->data, lba3->size); l_binaryWrite("/tmp/bytea/junk2.dat", "w", lba4->data, lba4->size); filesAreIdentical("feyn.tif", "/tmp/bytea/junk1.dat", &same1); filesAreIdentical("test24.jpg", "/tmp/bytea/junk2.dat", &same2); if (same1 && same2) fprintf(stderr, "OK for join file\n"); else fprintf(stderr, "Error: files are different!\n"); /* Split by function */ l_byteaSplit(lba1, size1, &lba5); l_binaryWrite("/tmp/bytea/junk3.dat", "w", lba1->data, lba1->size); l_binaryWrite("/tmp/bytea/junk4.dat", "w", lba5->data, lba5->size); filesAreIdentical("feyn.tif", "/tmp/bytea/junk3.dat", &same1); filesAreIdentical("test24.jpg", "/tmp/bytea/junk4.dat", &same2); if (same1 && same2) fprintf(stderr, "OK for split file\n"); else fprintf(stderr, "Error: files are different!\n"); l_byteaDestroy(&lba1); l_byteaDestroy(&lba2); l_byteaDestroy(&lba3); l_byteaDestroy(&lba4); l_byteaDestroy(&lba5); /* Test appending with strings */ data1 = l_binaryRead("kernel_reg.c", &size1); sa = sarrayCreateLinesFromString((char *)data1, 1); lba1 = l_byteaCreate(0); n = sarrayGetCount(sa); for (i = 0; i < n; i++) { str = sarrayGetString(sa, i, L_NOCOPY); l_byteaAppendString(lba1, str); l_byteaAppendString(lba1, (char *)"\n"); } data2 = l_byteaGetData(lba1, &size2); l_binaryWrite("/tmp/bytea/junk5.dat", "w", data2, size2); filesAreIdentical("kernel_reg.c", "/tmp/bytea/junk5.dat", &same1); if (same1) fprintf(stderr, "OK for appended string data\n"); else fprintf(stderr, "Error: appended string data is different!\n"); lept_free(data1); sarrayDestroy(&sa); l_byteaDestroy(&lba1); /* Test appending with binary data */ slice = 1000; total = nbytesInFile("breviar-a38.jp2"); lba1 = l_byteaCreate(100); n = 1 + total / slice; fprintf(stderr, "******************************************************\n"); fprintf(stderr, "* Testing error checking: ignore two reported errors *\n"); for (i = 0, start = 0; i <= n; i++, start += slice) { data1 = l_binaryReadSelect("breviar-a38.jp2", start, slice, &size1); l_byteaAppendData(lba1, data1, size1); lept_free(data1); } fprintf(stderr, "******************************************************\n"); data2 = l_byteaGetData(lba1, &size2); l_binaryWrite("/tmp/bytea/junk6.dat", "w", data2, size2); filesAreIdentical("breviar-a38.jp2", "/tmp/bytea/junk6.dat", &same1); if (same1) fprintf(stderr, "OK for appended binary data\n"); else fprintf(stderr, "Error: appended binary data is different!\n"); l_byteaDestroy(&lba1); /* Test search */ convertToPdf("test24.jpg", L_JPEG_ENCODE, 0, "/tmp/bytea/junk7.pdf", 0, 0, 100, NULL, NULL, 0); lba1 = l_byteaInitFromFile("/tmp/bytea/junk7.pdf"); l_byteaFindEachSequence(lba1, (l_uint8 *)" 0 obj\n", 7, &da); /* l_dnaWriteStream(stderr, da); */ n = l_dnaGetCount(da); if (n == 6) fprintf(stderr, "OK for search: found 6 instances\n"); else fprintf(stderr, "Error in search: found %d instances, not 6\n", n); l_byteaDestroy(&lba1); l_dnaDestroy(&da); /* Test write to file */ lba1 = l_byteaInitFromFile("feyn.tif"); fp = lept_fopen("/tmp/bytea/junk8.dat", "wb"); size1 = l_byteaGetSize(lba1); for (start = 0; start < size1; start += 1000) { end = L_MIN(start + 1000 - 1, size1 - 1); l_byteaWriteStream(fp, lba1, start, end); } lept_fclose(fp); filesAreIdentical("feyn.tif", "/tmp/bytea/junk8.dat", &same1); if (same1) fprintf(stderr, "OK for written binary data\n"); else fprintf(stderr, "Error: written binary data is different!\n"); l_byteaDestroy(&lba1); return 0; }